{ "best_metric": 0.3152608573436737, "best_model_checkpoint": "./w2v-bert-2.0-igbo_naijavoices_100h/checkpoint-8000", "epoch": 44.827586206896555, "eval_steps": 1000, "global_step": 13000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0034482758620689655, "grad_norm": Infinity, "learning_rate": 0.0, "loss": 9.8121, "step": 1 }, { "epoch": 0.006896551724137931, "grad_norm": 24.626651763916016, "learning_rate": 4.137931034482759e-09, "loss": 8.9441, "step": 2 }, { "epoch": 0.010344827586206896, "grad_norm": 23.451107025146484, "learning_rate": 8.275862068965518e-09, "loss": 8.5023, "step": 3 }, { "epoch": 0.013793103448275862, "grad_norm": 21.609752655029297, "learning_rate": 1.2413793103448276e-08, "loss": 7.9538, "step": 4 }, { "epoch": 0.017241379310344827, "grad_norm": 22.181669235229492, "learning_rate": 1.6551724137931037e-08, "loss": 8.0036, "step": 5 }, { "epoch": 0.020689655172413793, "grad_norm": 21.87883186340332, "learning_rate": 2.0689655172413793e-08, "loss": 7.9148, "step": 6 }, { "epoch": 0.02413793103448276, "grad_norm": 21.324798583984375, "learning_rate": 2.4827586206896552e-08, "loss": 7.7875, "step": 7 }, { "epoch": 0.027586206896551724, "grad_norm": 21.67850112915039, "learning_rate": 2.896551724137931e-08, "loss": 7.8451, "step": 8 }, { "epoch": 0.03103448275862069, "grad_norm": 20.160188674926758, "learning_rate": 3.3103448275862073e-08, "loss": 7.373, "step": 9 }, { "epoch": 0.034482758620689655, "grad_norm": 20.83883285522461, "learning_rate": 3.724137931034483e-08, "loss": 7.6019, "step": 10 }, { "epoch": 0.03793103448275862, "grad_norm": 20.49580192565918, "learning_rate": 4.1379310344827585e-08, "loss": 7.4225, "step": 11 }, { "epoch": 0.041379310344827586, "grad_norm": 20.22064971923828, "learning_rate": 4.5517241379310344e-08, "loss": 7.3817, "step": 12 }, { "epoch": 0.04482758620689655, "grad_norm": 20.65193748474121, "learning_rate": 4.9655172413793103e-08, "loss": 7.5033, "step": 13 }, { "epoch": 0.04827586206896552, "grad_norm": 20.91577911376953, "learning_rate": 5.379310344827586e-08, "loss": 7.4481, "step": 14 }, { "epoch": 0.05172413793103448, "grad_norm": 19.879791259765625, "learning_rate": 5.793103448275862e-08, "loss": 7.2069, "step": 15 }, { "epoch": 0.05517241379310345, "grad_norm": 20.917402267456055, "learning_rate": 6.206896551724139e-08, "loss": 7.374, "step": 16 }, { "epoch": 0.05862068965517241, "grad_norm": 20.588125228881836, "learning_rate": 6.620689655172415e-08, "loss": 7.3134, "step": 17 }, { "epoch": 0.06206896551724138, "grad_norm": 19.97978401184082, "learning_rate": 7.03448275862069e-08, "loss": 7.1184, "step": 18 }, { "epoch": 0.06551724137931035, "grad_norm": 21.11067771911621, "learning_rate": 7.448275862068966e-08, "loss": 7.4176, "step": 19 }, { "epoch": 0.06896551724137931, "grad_norm": 20.40967559814453, "learning_rate": 7.862068965517242e-08, "loss": 7.2113, "step": 20 }, { "epoch": 0.07241379310344828, "grad_norm": 20.008590698242188, "learning_rate": 8.275862068965517e-08, "loss": 7.0588, "step": 21 }, { "epoch": 0.07586206896551724, "grad_norm": 21.014400482177734, "learning_rate": 8.689655172413793e-08, "loss": 7.2412, "step": 22 }, { "epoch": 0.07931034482758621, "grad_norm": 20.204877853393555, "learning_rate": 9.103448275862069e-08, "loss": 7.0302, "step": 23 }, { "epoch": 0.08275862068965517, "grad_norm": 20.265783309936523, "learning_rate": 9.517241379310345e-08, "loss": 6.9927, "step": 24 }, { "epoch": 0.08620689655172414, "grad_norm": 19.22053337097168, "learning_rate": 9.931034482758621e-08, "loss": 6.6829, "step": 25 }, { "epoch": 0.0896551724137931, "grad_norm": 27.42808723449707, "learning_rate": 1.0344827586206897e-07, "loss": 9.6154, "step": 26 }, { "epoch": 0.09310344827586207, "grad_norm": 24.576560974121094, "learning_rate": 1.0758620689655173e-07, "loss": 8.6466, "step": 27 }, { "epoch": 0.09655172413793103, "grad_norm": 23.48551368713379, "learning_rate": 1.1172413793103448e-07, "loss": 8.287, "step": 28 }, { "epoch": 0.1, "grad_norm": 23.578935623168945, "learning_rate": 1.1586206896551724e-07, "loss": 8.2651, "step": 29 }, { "epoch": 0.10344827586206896, "grad_norm": 22.132747650146484, "learning_rate": 1.2000000000000002e-07, "loss": 7.8535, "step": 30 }, { "epoch": 0.10689655172413794, "grad_norm": 22.648555755615234, "learning_rate": 1.2413793103448277e-07, "loss": 7.9494, "step": 31 }, { "epoch": 0.1103448275862069, "grad_norm": 22.280717849731445, "learning_rate": 1.2827586206896553e-07, "loss": 7.7172, "step": 32 }, { "epoch": 0.11379310344827587, "grad_norm": 21.27153205871582, "learning_rate": 1.324137931034483e-07, "loss": 7.4466, "step": 33 }, { "epoch": 0.11724137931034483, "grad_norm": 21.670337677001953, "learning_rate": 1.3655172413793105e-07, "loss": 7.5141, "step": 34 }, { "epoch": 0.1206896551724138, "grad_norm": 21.298511505126953, "learning_rate": 1.406896551724138e-07, "loss": 7.3662, "step": 35 }, { "epoch": 0.12413793103448276, "grad_norm": 20.90970230102539, "learning_rate": 1.4482758620689657e-07, "loss": 7.2314, "step": 36 }, { "epoch": 0.12758620689655173, "grad_norm": 20.692155838012695, "learning_rate": 1.4896551724137933e-07, "loss": 7.1799, "step": 37 }, { "epoch": 0.1310344827586207, "grad_norm": 20.74738311767578, "learning_rate": 1.531034482758621e-07, "loss": 7.164, "step": 38 }, { "epoch": 0.13448275862068965, "grad_norm": 21.784286499023438, "learning_rate": 1.5724137931034485e-07, "loss": 7.3596, "step": 39 }, { "epoch": 0.13793103448275862, "grad_norm": 20.660402297973633, "learning_rate": 1.613793103448276e-07, "loss": 7.0806, "step": 40 }, { "epoch": 0.1413793103448276, "grad_norm": 20.645362854003906, "learning_rate": 1.6551724137931034e-07, "loss": 7.0195, "step": 41 }, { "epoch": 0.14482758620689656, "grad_norm": 21.143461227416992, "learning_rate": 1.696551724137931e-07, "loss": 7.1226, "step": 42 }, { "epoch": 0.1482758620689655, "grad_norm": 20.844785690307617, "learning_rate": 1.7379310344827586e-07, "loss": 6.9369, "step": 43 }, { "epoch": 0.15172413793103448, "grad_norm": 21.461313247680664, "learning_rate": 1.7793103448275862e-07, "loss": 7.0857, "step": 44 }, { "epoch": 0.15517241379310345, "grad_norm": 22.07090950012207, "learning_rate": 1.8206896551724138e-07, "loss": 7.149, "step": 45 }, { "epoch": 0.15862068965517243, "grad_norm": 21.32610511779785, "learning_rate": 1.8620689655172414e-07, "loss": 6.9698, "step": 46 }, { "epoch": 0.16206896551724137, "grad_norm": 22.36414337158203, "learning_rate": 1.903448275862069e-07, "loss": 7.1207, "step": 47 }, { "epoch": 0.16551724137931034, "grad_norm": 21.387327194213867, "learning_rate": 1.9448275862068965e-07, "loss": 6.8794, "step": 48 }, { "epoch": 0.16896551724137931, "grad_norm": 20.94029426574707, "learning_rate": 1.9862068965517241e-07, "loss": 6.7364, "step": 49 }, { "epoch": 0.1724137931034483, "grad_norm": 21.65522003173828, "learning_rate": 2.0275862068965517e-07, "loss": 6.7998, "step": 50 }, { "epoch": 0.17586206896551723, "grad_norm": 29.565858840942383, "learning_rate": 2.0689655172413793e-07, "loss": 9.2156, "step": 51 }, { "epoch": 0.1793103448275862, "grad_norm": 27.323368072509766, "learning_rate": 2.110344827586207e-07, "loss": 8.5075, "step": 52 }, { "epoch": 0.18275862068965518, "grad_norm": 24.66246223449707, "learning_rate": 2.1517241379310345e-07, "loss": 7.8436, "step": 53 }, { "epoch": 0.18620689655172415, "grad_norm": 25.4600830078125, "learning_rate": 2.193103448275862e-07, "loss": 7.9175, "step": 54 }, { "epoch": 0.1896551724137931, "grad_norm": 24.19964599609375, "learning_rate": 2.2344827586206897e-07, "loss": 7.5676, "step": 55 }, { "epoch": 0.19310344827586207, "grad_norm": 24.364341735839844, "learning_rate": 2.2758620689655173e-07, "loss": 7.5834, "step": 56 }, { "epoch": 0.19655172413793104, "grad_norm": 24.353649139404297, "learning_rate": 2.3172413793103449e-07, "loss": 7.4983, "step": 57 }, { "epoch": 0.2, "grad_norm": 23.747594833374023, "learning_rate": 2.3586206896551722e-07, "loss": 7.324, "step": 58 }, { "epoch": 0.20344827586206896, "grad_norm": 24.316612243652344, "learning_rate": 2.4000000000000003e-07, "loss": 7.3725, "step": 59 }, { "epoch": 0.20689655172413793, "grad_norm": 23.510217666625977, "learning_rate": 2.4413793103448274e-07, "loss": 7.1696, "step": 60 }, { "epoch": 0.2103448275862069, "grad_norm": 22.83513641357422, "learning_rate": 2.4827586206896555e-07, "loss": 7.0418, "step": 61 }, { "epoch": 0.21379310344827587, "grad_norm": 23.83818244934082, "learning_rate": 2.5241379310344826e-07, "loss": 7.1425, "step": 62 }, { "epoch": 0.21724137931034482, "grad_norm": 23.67046546936035, "learning_rate": 2.5655172413793107e-07, "loss": 7.0516, "step": 63 }, { "epoch": 0.2206896551724138, "grad_norm": 23.752742767333984, "learning_rate": 2.606896551724138e-07, "loss": 7.0174, "step": 64 }, { "epoch": 0.22413793103448276, "grad_norm": 24.10784149169922, "learning_rate": 2.648275862068966e-07, "loss": 7.053, "step": 65 }, { "epoch": 0.22758620689655173, "grad_norm": 23.517871856689453, "learning_rate": 2.689655172413793e-07, "loss": 6.9443, "step": 66 }, { "epoch": 0.23103448275862068, "grad_norm": 23.52369499206543, "learning_rate": 2.731034482758621e-07, "loss": 6.8284, "step": 67 }, { "epoch": 0.23448275862068965, "grad_norm": 23.79762840270996, "learning_rate": 2.772413793103448e-07, "loss": 6.834, "step": 68 }, { "epoch": 0.23793103448275862, "grad_norm": 24.492591857910156, "learning_rate": 2.813793103448276e-07, "loss": 6.8944, "step": 69 }, { "epoch": 0.2413793103448276, "grad_norm": 24.49434471130371, "learning_rate": 2.8551724137931033e-07, "loss": 6.854, "step": 70 }, { "epoch": 0.24482758620689654, "grad_norm": 23.685287475585938, "learning_rate": 2.8965517241379314e-07, "loss": 6.6818, "step": 71 }, { "epoch": 0.2482758620689655, "grad_norm": 23.467649459838867, "learning_rate": 2.9379310344827585e-07, "loss": 6.5804, "step": 72 }, { "epoch": 0.2517241379310345, "grad_norm": 24.071460723876953, "learning_rate": 2.9793103448275866e-07, "loss": 6.6497, "step": 73 }, { "epoch": 0.25517241379310346, "grad_norm": 22.845537185668945, "learning_rate": 3.0206896551724137e-07, "loss": 6.396, "step": 74 }, { "epoch": 0.25862068965517243, "grad_norm": 23.36939239501953, "learning_rate": 3.062068965517242e-07, "loss": 6.3966, "step": 75 }, { "epoch": 0.2620689655172414, "grad_norm": Infinity, "learning_rate": 3.062068965517242e-07, "loss": 8.5372, "step": 76 }, { "epoch": 0.2655172413793103, "grad_norm": 32.277339935302734, "learning_rate": 3.103448275862069e-07, "loss": 8.0929, "step": 77 }, { "epoch": 0.2689655172413793, "grad_norm": 30.424768447875977, "learning_rate": 3.144827586206897e-07, "loss": 7.6649, "step": 78 }, { "epoch": 0.27241379310344827, "grad_norm": 28.959487915039062, "learning_rate": 3.186206896551724e-07, "loss": 7.3749, "step": 79 }, { "epoch": 0.27586206896551724, "grad_norm": 28.744243621826172, "learning_rate": 3.227586206896552e-07, "loss": 7.2828, "step": 80 }, { "epoch": 0.2793103448275862, "grad_norm": 28.36699104309082, "learning_rate": 3.268965517241379e-07, "loss": 7.1666, "step": 81 }, { "epoch": 0.2827586206896552, "grad_norm": 28.825965881347656, "learning_rate": 3.310344827586207e-07, "loss": 7.1132, "step": 82 }, { "epoch": 0.28620689655172415, "grad_norm": 28.28879737854004, "learning_rate": 3.3517241379310344e-07, "loss": 6.99, "step": 83 }, { "epoch": 0.2896551724137931, "grad_norm": 27.990652084350586, "learning_rate": 3.393103448275862e-07, "loss": 6.9023, "step": 84 }, { "epoch": 0.29310344827586204, "grad_norm": 27.05352210998535, "learning_rate": 3.4344827586206896e-07, "loss": 6.7275, "step": 85 }, { "epoch": 0.296551724137931, "grad_norm": 27.072874069213867, "learning_rate": 3.475862068965517e-07, "loss": 6.6299, "step": 86 }, { "epoch": 0.3, "grad_norm": 27.096595764160156, "learning_rate": 3.517241379310345e-07, "loss": 6.6129, "step": 87 }, { "epoch": 0.30344827586206896, "grad_norm": 28.595806121826172, "learning_rate": 3.5586206896551724e-07, "loss": 6.7487, "step": 88 }, { "epoch": 0.30689655172413793, "grad_norm": 28.768007278442383, "learning_rate": 3.6e-07, "loss": 6.7181, "step": 89 }, { "epoch": 0.3103448275862069, "grad_norm": 28.124353408813477, "learning_rate": 3.6413793103448275e-07, "loss": 6.5572, "step": 90 }, { "epoch": 0.3137931034482759, "grad_norm": 26.46561050415039, "learning_rate": 3.682758620689655e-07, "loss": 6.327, "step": 91 }, { "epoch": 0.31724137931034485, "grad_norm": 27.72374725341797, "learning_rate": 3.7241379310344827e-07, "loss": 6.4075, "step": 92 }, { "epoch": 0.32068965517241377, "grad_norm": 27.299312591552734, "learning_rate": 3.7655172413793103e-07, "loss": 6.2929, "step": 93 }, { "epoch": 0.32413793103448274, "grad_norm": 27.183692932128906, "learning_rate": 3.806896551724138e-07, "loss": 6.243, "step": 94 }, { "epoch": 0.3275862068965517, "grad_norm": 27.85772705078125, "learning_rate": 3.8482758620689655e-07, "loss": 6.2658, "step": 95 }, { "epoch": 0.3310344827586207, "grad_norm": 28.406217575073242, "learning_rate": 3.889655172413793e-07, "loss": 6.2642, "step": 96 }, { "epoch": 0.33448275862068966, "grad_norm": 26.223026275634766, "learning_rate": 3.931034482758621e-07, "loss": 5.988, "step": 97 }, { "epoch": 0.33793103448275863, "grad_norm": 27.620248794555664, "learning_rate": 3.9724137931034483e-07, "loss": 6.066, "step": 98 }, { "epoch": 0.3413793103448276, "grad_norm": 27.319509506225586, "learning_rate": 4.0137931034482764e-07, "loss": 6.0081, "step": 99 }, { "epoch": 0.3448275862068966, "grad_norm": 25.674129486083984, "learning_rate": 4.0551724137931035e-07, "loss": 5.7813, "step": 100 }, { "epoch": 0.3482758620689655, "grad_norm": 46.45396423339844, "learning_rate": 4.0965517241379316e-07, "loss": 8.0018, "step": 101 }, { "epoch": 0.35172413793103446, "grad_norm": 41.307186126708984, "learning_rate": 4.1379310344827586e-07, "loss": 7.3813, "step": 102 }, { "epoch": 0.35517241379310344, "grad_norm": 36.86460876464844, "learning_rate": 4.179310344827587e-07, "loss": 6.8599, "step": 103 }, { "epoch": 0.3586206896551724, "grad_norm": 36.31233215332031, "learning_rate": 4.220689655172414e-07, "loss": 6.7307, "step": 104 }, { "epoch": 0.3620689655172414, "grad_norm": 36.56094741821289, "learning_rate": 4.262068965517242e-07, "loss": 6.6828, "step": 105 }, { "epoch": 0.36551724137931035, "grad_norm": 32.29682922363281, "learning_rate": 4.303448275862069e-07, "loss": 6.2593, "step": 106 }, { "epoch": 0.3689655172413793, "grad_norm": 32.42371368408203, "learning_rate": 4.344827586206897e-07, "loss": 6.2091, "step": 107 }, { "epoch": 0.3724137931034483, "grad_norm": 33.37789535522461, "learning_rate": 4.386206896551724e-07, "loss": 6.2188, "step": 108 }, { "epoch": 0.3758620689655172, "grad_norm": 32.41092300415039, "learning_rate": 4.427586206896552e-07, "loss": 6.0808, "step": 109 }, { "epoch": 0.3793103448275862, "grad_norm": 33.27195358276367, "learning_rate": 4.4689655172413794e-07, "loss": 6.0988, "step": 110 }, { "epoch": 0.38275862068965516, "grad_norm": 31.98866081237793, "learning_rate": 4.510344827586207e-07, "loss": 5.9503, "step": 111 }, { "epoch": 0.38620689655172413, "grad_norm": 31.832658767700195, "learning_rate": 4.5517241379310346e-07, "loss": 5.8734, "step": 112 }, { "epoch": 0.3896551724137931, "grad_norm": 31.5836238861084, "learning_rate": 4.593103448275862e-07, "loss": 5.8088, "step": 113 }, { "epoch": 0.3931034482758621, "grad_norm": 31.965721130371094, "learning_rate": 4.6344827586206897e-07, "loss": 5.8063, "step": 114 }, { "epoch": 0.39655172413793105, "grad_norm": 33.446598052978516, "learning_rate": 4.6758620689655173e-07, "loss": 5.8372, "step": 115 }, { "epoch": 0.4, "grad_norm": 31.13445281982422, "learning_rate": 4.7172413793103444e-07, "loss": 5.6313, "step": 116 }, { "epoch": 0.40344827586206894, "grad_norm": 30.23342514038086, "learning_rate": 4.758620689655173e-07, "loss": 5.5323, "step": 117 }, { "epoch": 0.4068965517241379, "grad_norm": 29.8189754486084, "learning_rate": 4.800000000000001e-07, "loss": 5.47, "step": 118 }, { "epoch": 0.4103448275862069, "grad_norm": 31.309221267700195, "learning_rate": 4.841379310344827e-07, "loss": 5.4859, "step": 119 }, { "epoch": 0.41379310344827586, "grad_norm": 29.930723190307617, "learning_rate": 4.882758620689655e-07, "loss": 5.3913, "step": 120 }, { "epoch": 0.41724137931034483, "grad_norm": 29.34099578857422, "learning_rate": 4.924137931034483e-07, "loss": 5.3011, "step": 121 }, { "epoch": 0.4206896551724138, "grad_norm": 29.42848777770996, "learning_rate": 4.965517241379311e-07, "loss": 5.2748, "step": 122 }, { "epoch": 0.4241379310344828, "grad_norm": 29.306415557861328, "learning_rate": 5.006896551724138e-07, "loss": 5.2378, "step": 123 }, { "epoch": 0.42758620689655175, "grad_norm": 29.140092849731445, "learning_rate": 5.048275862068965e-07, "loss": 5.1863, "step": 124 }, { "epoch": 0.43103448275862066, "grad_norm": 25.93458366394043, "learning_rate": 5.089655172413794e-07, "loss": 5.0047, "step": 125 }, { "epoch": 0.43448275862068964, "grad_norm": 51.143653869628906, "learning_rate": 5.131034482758621e-07, "loss": 6.4493, "step": 126 }, { "epoch": 0.4379310344827586, "grad_norm": 41.44502639770508, "learning_rate": 5.172413793103448e-07, "loss": 5.8074, "step": 127 }, { "epoch": 0.4413793103448276, "grad_norm": 40.038299560546875, "learning_rate": 5.213793103448275e-07, "loss": 5.6653, "step": 128 }, { "epoch": 0.44482758620689655, "grad_norm": 36.82391357421875, "learning_rate": 5.255172413793104e-07, "loss": 5.4607, "step": 129 }, { "epoch": 0.4482758620689655, "grad_norm": 34.70587921142578, "learning_rate": 5.296551724137932e-07, "loss": 5.2964, "step": 130 }, { "epoch": 0.4517241379310345, "grad_norm": 32.8710823059082, "learning_rate": 5.337931034482758e-07, "loss": 5.1438, "step": 131 }, { "epoch": 0.45517241379310347, "grad_norm": 30.865753173828125, "learning_rate": 5.379310344827586e-07, "loss": 5.0171, "step": 132 }, { "epoch": 0.4586206896551724, "grad_norm": 30.607624053955078, "learning_rate": 5.420689655172415e-07, "loss": 4.9921, "step": 133 }, { "epoch": 0.46206896551724136, "grad_norm": 30.146276473999023, "learning_rate": 5.462068965517242e-07, "loss": 4.9247, "step": 134 }, { "epoch": 0.46551724137931033, "grad_norm": 28.758121490478516, "learning_rate": 5.503448275862069e-07, "loss": 4.8385, "step": 135 }, { "epoch": 0.4689655172413793, "grad_norm": 26.368377685546875, "learning_rate": 5.544827586206896e-07, "loss": 4.7108, "step": 136 }, { "epoch": 0.4724137931034483, "grad_norm": 26.235729217529297, "learning_rate": 5.586206896551724e-07, "loss": 4.6806, "step": 137 }, { "epoch": 0.47586206896551725, "grad_norm": 25.37425994873047, "learning_rate": 5.627586206896552e-07, "loss": 4.6257, "step": 138 }, { "epoch": 0.4793103448275862, "grad_norm": 24.083820343017578, "learning_rate": 5.668965517241379e-07, "loss": 4.5504, "step": 139 }, { "epoch": 0.4827586206896552, "grad_norm": 24.481624603271484, "learning_rate": 5.710344827586207e-07, "loss": 4.5467, "step": 140 }, { "epoch": 0.4862068965517241, "grad_norm": 21.312671661376953, "learning_rate": 5.751724137931034e-07, "loss": 4.4262, "step": 141 }, { "epoch": 0.4896551724137931, "grad_norm": 21.179706573486328, "learning_rate": 5.793103448275863e-07, "loss": 4.3891, "step": 142 }, { "epoch": 0.49310344827586206, "grad_norm": 18.68304443359375, "learning_rate": 5.834482758620689e-07, "loss": 4.3243, "step": 143 }, { "epoch": 0.496551724137931, "grad_norm": 17.834753036499023, "learning_rate": 5.875862068965517e-07, "loss": 4.2921, "step": 144 }, { "epoch": 0.5, "grad_norm": 16.05028533935547, "learning_rate": 5.917241379310345e-07, "loss": 4.25, "step": 145 }, { "epoch": 0.503448275862069, "grad_norm": 16.60947036743164, "learning_rate": 5.958620689655173e-07, "loss": 4.2184, "step": 146 }, { "epoch": 0.506896551724138, "grad_norm": 14.259113311767578, "learning_rate": 6.000000000000001e-07, "loss": 4.1974, "step": 147 }, { "epoch": 0.5103448275862069, "grad_norm": 13.364341735839844, "learning_rate": 6.041379310344827e-07, "loss": 4.1765, "step": 148 }, { "epoch": 0.5137931034482759, "grad_norm": 11.273811340332031, "learning_rate": 6.082758620689655e-07, "loss": 4.1305, "step": 149 }, { "epoch": 0.5172413793103449, "grad_norm": 11.049596786499023, "learning_rate": 6.124137931034484e-07, "loss": 4.1547, "step": 150 }, { "epoch": 0.5206896551724138, "grad_norm": 24.797391891479492, "learning_rate": 6.165517241379311e-07, "loss": 4.33, "step": 151 }, { "epoch": 0.5241379310344828, "grad_norm": 17.39615249633789, "learning_rate": 6.206896551724138e-07, "loss": 4.1762, "step": 152 }, { "epoch": 0.5275862068965518, "grad_norm": 12.710926055908203, "learning_rate": 6.248275862068965e-07, "loss": 4.0763, "step": 153 }, { "epoch": 0.5310344827586206, "grad_norm": 11.422768592834473, "learning_rate": 6.289655172413794e-07, "loss": 4.0537, "step": 154 }, { "epoch": 0.5344827586206896, "grad_norm": 9.516938209533691, "learning_rate": 6.331034482758622e-07, "loss": 4.023, "step": 155 }, { "epoch": 0.5379310344827586, "grad_norm": 9.058573722839355, "learning_rate": 6.372413793103448e-07, "loss": 3.9955, "step": 156 }, { "epoch": 0.5413793103448276, "grad_norm": 9.562417030334473, "learning_rate": 6.413793103448276e-07, "loss": 3.9849, "step": 157 }, { "epoch": 0.5448275862068965, "grad_norm": 10.477831840515137, "learning_rate": 6.455172413793104e-07, "loss": 3.9739, "step": 158 }, { "epoch": 0.5482758620689655, "grad_norm": 11.113611221313477, "learning_rate": 6.496551724137932e-07, "loss": 3.9718, "step": 159 }, { "epoch": 0.5517241379310345, "grad_norm": 13.873380661010742, "learning_rate": 6.537931034482758e-07, "loss": 3.9575, "step": 160 }, { "epoch": 0.5551724137931034, "grad_norm": 11.87230110168457, "learning_rate": 6.579310344827586e-07, "loss": 3.9358, "step": 161 }, { "epoch": 0.5586206896551724, "grad_norm": 12.589128494262695, "learning_rate": 6.620689655172414e-07, "loss": 3.9331, "step": 162 }, { "epoch": 0.5620689655172414, "grad_norm": 11.685302734375, "learning_rate": 6.662068965517242e-07, "loss": 3.9128, "step": 163 }, { "epoch": 0.5655172413793104, "grad_norm": 13.289584159851074, "learning_rate": 6.703448275862069e-07, "loss": 3.8887, "step": 164 }, { "epoch": 0.5689655172413793, "grad_norm": 12.11998462677002, "learning_rate": 6.744827586206896e-07, "loss": 3.882, "step": 165 }, { "epoch": 0.5724137931034483, "grad_norm": 12.002544403076172, "learning_rate": 6.786206896551724e-07, "loss": 3.8506, "step": 166 }, { "epoch": 0.5758620689655173, "grad_norm": 10.782925605773926, "learning_rate": 6.827586206896553e-07, "loss": 3.8235, "step": 167 }, { "epoch": 0.5793103448275863, "grad_norm": 9.925994873046875, "learning_rate": 6.868965517241379e-07, "loss": 3.804, "step": 168 }, { "epoch": 0.5827586206896552, "grad_norm": 9.505733489990234, "learning_rate": 6.910344827586207e-07, "loss": 3.7773, "step": 169 }, { "epoch": 0.5862068965517241, "grad_norm": 9.093132972717285, "learning_rate": 6.951724137931034e-07, "loss": 3.7649, "step": 170 }, { "epoch": 0.5896551724137931, "grad_norm": 8.376246452331543, "learning_rate": 6.993103448275863e-07, "loss": 3.7566, "step": 171 }, { "epoch": 0.593103448275862, "grad_norm": 7.9069671630859375, "learning_rate": 7.03448275862069e-07, "loss": 3.7232, "step": 172 }, { "epoch": 0.596551724137931, "grad_norm": 9.632719993591309, "learning_rate": 7.075862068965517e-07, "loss": 3.7036, "step": 173 }, { "epoch": 0.6, "grad_norm": 7.290523529052734, "learning_rate": 7.117241379310345e-07, "loss": 3.7128, "step": 174 }, { "epoch": 0.603448275862069, "grad_norm": 7.671509742736816, "learning_rate": 7.158620689655173e-07, "loss": 3.7127, "step": 175 }, { "epoch": 0.6068965517241379, "grad_norm": 22.289722442626953, "learning_rate": 7.2e-07, "loss": 3.8346, "step": 176 }, { "epoch": 0.6103448275862069, "grad_norm": 15.810077667236328, "learning_rate": 7.241379310344827e-07, "loss": 3.7209, "step": 177 }, { "epoch": 0.6137931034482759, "grad_norm": 14.73763656616211, "learning_rate": 7.282758620689655e-07, "loss": 3.7046, "step": 178 }, { "epoch": 0.6172413793103448, "grad_norm": 14.284663200378418, "learning_rate": 7.324137931034484e-07, "loss": 3.6892, "step": 179 }, { "epoch": 0.6206896551724138, "grad_norm": 12.460369110107422, "learning_rate": 7.36551724137931e-07, "loss": 3.658, "step": 180 }, { "epoch": 0.6241379310344828, "grad_norm": 9.744416236877441, "learning_rate": 7.406896551724138e-07, "loss": 3.6001, "step": 181 }, { "epoch": 0.6275862068965518, "grad_norm": 7.535118103027344, "learning_rate": 7.448275862068965e-07, "loss": 3.5639, "step": 182 }, { "epoch": 0.6310344827586207, "grad_norm": 7.935444355010986, "learning_rate": 7.489655172413794e-07, "loss": 3.5747, "step": 183 }, { "epoch": 0.6344827586206897, "grad_norm": 5.821645259857178, "learning_rate": 7.531034482758621e-07, "loss": 3.537, "step": 184 }, { "epoch": 0.6379310344827587, "grad_norm": 5.317400932312012, "learning_rate": 7.572413793103448e-07, "loss": 3.5236, "step": 185 }, { "epoch": 0.6413793103448275, "grad_norm": 5.119656085968018, "learning_rate": 7.613793103448276e-07, "loss": 3.5128, "step": 186 }, { "epoch": 0.6448275862068965, "grad_norm": 5.3827033042907715, "learning_rate": 7.655172413793104e-07, "loss": 3.4997, "step": 187 }, { "epoch": 0.6482758620689655, "grad_norm": 5.49559211730957, "learning_rate": 7.696551724137931e-07, "loss": 3.4818, "step": 188 }, { "epoch": 0.6517241379310345, "grad_norm": 6.456326007843018, "learning_rate": 7.737931034482759e-07, "loss": 3.4986, "step": 189 }, { "epoch": 0.6551724137931034, "grad_norm": 6.338305950164795, "learning_rate": 7.779310344827586e-07, "loss": 3.4718, "step": 190 }, { "epoch": 0.6586206896551724, "grad_norm": 6.651682376861572, "learning_rate": 7.820689655172414e-07, "loss": 3.4447, "step": 191 }, { "epoch": 0.6620689655172414, "grad_norm": 6.268693923950195, "learning_rate": 7.862068965517242e-07, "loss": 3.455, "step": 192 }, { "epoch": 0.6655172413793103, "grad_norm": 5.284954071044922, "learning_rate": 7.903448275862069e-07, "loss": 3.4316, "step": 193 }, { "epoch": 0.6689655172413793, "grad_norm": 5.058048725128174, "learning_rate": 7.944827586206897e-07, "loss": 3.4281, "step": 194 }, { "epoch": 0.6724137931034483, "grad_norm": 4.13246488571167, "learning_rate": 7.986206896551724e-07, "loss": 3.4029, "step": 195 }, { "epoch": 0.6758620689655173, "grad_norm": 4.459177494049072, "learning_rate": 8.027586206896553e-07, "loss": 3.3876, "step": 196 }, { "epoch": 0.6793103448275862, "grad_norm": 7.320526123046875, "learning_rate": 8.068965517241379e-07, "loss": 3.373, "step": 197 }, { "epoch": 0.6827586206896552, "grad_norm": 4.002184867858887, "learning_rate": 8.110344827586207e-07, "loss": 3.3759, "step": 198 }, { "epoch": 0.6862068965517242, "grad_norm": 3.871610641479492, "learning_rate": 8.151724137931034e-07, "loss": 3.3539, "step": 199 }, { "epoch": 0.6896551724137931, "grad_norm": 4.039922714233398, "learning_rate": 8.193103448275863e-07, "loss": 3.3827, "step": 200 }, { "epoch": 0.6931034482758621, "grad_norm": 22.213947296142578, "learning_rate": 8.23448275862069e-07, "loss": 3.478, "step": 201 }, { "epoch": 0.696551724137931, "grad_norm": 16.709863662719727, "learning_rate": 8.275862068965517e-07, "loss": 3.4, "step": 202 }, { "epoch": 0.7, "grad_norm": 13.10354995727539, "learning_rate": 8.317241379310345e-07, "loss": 3.3477, "step": 203 }, { "epoch": 0.7034482758620689, "grad_norm": 11.01998519897461, "learning_rate": 8.358620689655174e-07, "loss": 3.3236, "step": 204 }, { "epoch": 0.7068965517241379, "grad_norm": 8.578219413757324, "learning_rate": 8.4e-07, "loss": 3.3135, "step": 205 }, { "epoch": 0.7103448275862069, "grad_norm": 4.091947555541992, "learning_rate": 8.441379310344828e-07, "loss": 3.2953, "step": 206 }, { "epoch": 0.7137931034482758, "grad_norm": 3.4423506259918213, "learning_rate": 8.482758620689655e-07, "loss": 3.2864, "step": 207 }, { "epoch": 0.7172413793103448, "grad_norm": 4.1074137687683105, "learning_rate": 8.524137931034484e-07, "loss": 3.2869, "step": 208 }, { "epoch": 0.7206896551724138, "grad_norm": 4.167697906494141, "learning_rate": 8.56551724137931e-07, "loss": 3.2711, "step": 209 }, { "epoch": 0.7241379310344828, "grad_norm": 6.487751007080078, "learning_rate": 8.606896551724138e-07, "loss": 3.269, "step": 210 }, { "epoch": 0.7275862068965517, "grad_norm": 5.834202289581299, "learning_rate": 8.648275862068966e-07, "loss": 3.2661, "step": 211 }, { "epoch": 0.7310344827586207, "grad_norm": 6.1789116859436035, "learning_rate": 8.689655172413794e-07, "loss": 3.2517, "step": 212 }, { "epoch": 0.7344827586206897, "grad_norm": 6.133596420288086, "learning_rate": 8.731034482758621e-07, "loss": 3.2437, "step": 213 }, { "epoch": 0.7379310344827587, "grad_norm": 5.8807373046875, "learning_rate": 8.772413793103448e-07, "loss": 3.2338, "step": 214 }, { "epoch": 0.7413793103448276, "grad_norm": 4.874159812927246, "learning_rate": 8.813793103448276e-07, "loss": 3.2414, "step": 215 }, { "epoch": 0.7448275862068966, "grad_norm": 3.5084476470947266, "learning_rate": 8.855172413793104e-07, "loss": 3.2213, "step": 216 }, { "epoch": 0.7482758620689656, "grad_norm": 3.3059144020080566, "learning_rate": 8.896551724137931e-07, "loss": 3.214, "step": 217 }, { "epoch": 0.7517241379310344, "grad_norm": 4.978625297546387, "learning_rate": 8.937931034482759e-07, "loss": 3.1848, "step": 218 }, { "epoch": 0.7551724137931034, "grad_norm": 4.905598163604736, "learning_rate": 8.979310344827586e-07, "loss": 3.1991, "step": 219 }, { "epoch": 0.7586206896551724, "grad_norm": 4.9894514083862305, "learning_rate": 9.020689655172414e-07, "loss": 3.1897, "step": 220 }, { "epoch": 0.7620689655172413, "grad_norm": 5.154286861419678, "learning_rate": 9.062068965517242e-07, "loss": 3.1937, "step": 221 }, { "epoch": 0.7655172413793103, "grad_norm": 4.563971519470215, "learning_rate": 9.103448275862069e-07, "loss": 3.1923, "step": 222 }, { "epoch": 0.7689655172413793, "grad_norm": 2.376009702682495, "learning_rate": 9.144827586206897e-07, "loss": 3.1843, "step": 223 }, { "epoch": 0.7724137931034483, "grad_norm": 5.031677722930908, "learning_rate": 9.186206896551724e-07, "loss": 3.1829, "step": 224 }, { "epoch": 0.7758620689655172, "grad_norm": 4.884829521179199, "learning_rate": 9.227586206896552e-07, "loss": 3.2428, "step": 225 }, { "epoch": 0.7793103448275862, "grad_norm": 20.68384552001953, "learning_rate": 9.268965517241379e-07, "loss": 3.2539, "step": 226 }, { "epoch": 0.7827586206896552, "grad_norm": 14.590863227844238, "learning_rate": 9.310344827586207e-07, "loss": 3.2158, "step": 227 }, { "epoch": 0.7862068965517242, "grad_norm": 7.548260688781738, "learning_rate": 9.351724137931035e-07, "loss": 3.1557, "step": 228 }, { "epoch": 0.7896551724137931, "grad_norm": 3.588167905807495, "learning_rate": 9.393103448275861e-07, "loss": 3.1521, "step": 229 }, { "epoch": 0.7931034482758621, "grad_norm": 2.12664794921875, "learning_rate": 9.434482758620689e-07, "loss": 3.1522, "step": 230 }, { "epoch": 0.7965517241379311, "grad_norm": 4.123807907104492, "learning_rate": 9.475862068965518e-07, "loss": 3.1464, "step": 231 }, { "epoch": 0.8, "grad_norm": 6.0648112297058105, "learning_rate": 9.517241379310346e-07, "loss": 3.1546, "step": 232 }, { "epoch": 0.803448275862069, "grad_norm": 8.961132049560547, "learning_rate": 9.558620689655173e-07, "loss": 3.1646, "step": 233 }, { "epoch": 0.8068965517241379, "grad_norm": 7.862599849700928, "learning_rate": 9.600000000000001e-07, "loss": 3.1441, "step": 234 }, { "epoch": 0.8103448275862069, "grad_norm": 5.842194080352783, "learning_rate": 9.641379310344828e-07, "loss": 3.1487, "step": 235 }, { "epoch": 0.8137931034482758, "grad_norm": 3.5894479751586914, "learning_rate": 9.682758620689654e-07, "loss": 3.1605, "step": 236 }, { "epoch": 0.8172413793103448, "grad_norm": 2.3379898071289062, "learning_rate": 9.724137931034483e-07, "loss": 3.1306, "step": 237 }, { "epoch": 0.8206896551724138, "grad_norm": 3.7239580154418945, "learning_rate": 9.76551724137931e-07, "loss": 3.1456, "step": 238 }, { "epoch": 0.8241379310344827, "grad_norm": 3.489032030105591, "learning_rate": 9.806896551724138e-07, "loss": 3.127, "step": 239 }, { "epoch": 0.8275862068965517, "grad_norm": 3.1880104541778564, "learning_rate": 9.848275862068967e-07, "loss": 3.1076, "step": 240 }, { "epoch": 0.8310344827586207, "grad_norm": 3.263913869857788, "learning_rate": 9.889655172413793e-07, "loss": 3.1112, "step": 241 }, { "epoch": 0.8344827586206897, "grad_norm": 3.0275516510009766, "learning_rate": 9.931034482758622e-07, "loss": 3.1201, "step": 242 }, { "epoch": 0.8379310344827586, "grad_norm": 1.6835054159164429, "learning_rate": 9.972413793103449e-07, "loss": 3.1356, "step": 243 }, { "epoch": 0.8413793103448276, "grad_norm": 2.9992339611053467, "learning_rate": 1.0013793103448275e-06, "loss": 3.109, "step": 244 }, { "epoch": 0.8448275862068966, "grad_norm": 3.3936071395874023, "learning_rate": 1.0055172413793104e-06, "loss": 3.1249, "step": 245 }, { "epoch": 0.8482758620689655, "grad_norm": 3.666984796524048, "learning_rate": 1.009655172413793e-06, "loss": 3.1294, "step": 246 }, { "epoch": 0.8517241379310345, "grad_norm": 2.6567742824554443, "learning_rate": 1.0137931034482759e-06, "loss": 3.1037, "step": 247 }, { "epoch": 0.8551724137931035, "grad_norm": 1.957770586013794, "learning_rate": 1.0179310344827588e-06, "loss": 3.1155, "step": 248 }, { "epoch": 0.8586206896551725, "grad_norm": 1.9777419567108154, "learning_rate": 1.0220689655172414e-06, "loss": 3.1115, "step": 249 }, { "epoch": 0.8620689655172413, "grad_norm": 3.978499174118042, "learning_rate": 1.0262068965517243e-06, "loss": 3.1287, "step": 250 }, { "epoch": 0.8655172413793103, "grad_norm": 30.433637619018555, "learning_rate": 1.030344827586207e-06, "loss": 3.2908, "step": 251 }, { "epoch": 0.8689655172413793, "grad_norm": 25.12896156311035, "learning_rate": 1.0344827586206896e-06, "loss": 3.2285, "step": 252 }, { "epoch": 0.8724137931034482, "grad_norm": 20.09853744506836, "learning_rate": 1.0386206896551724e-06, "loss": 3.1877, "step": 253 }, { "epoch": 0.8758620689655172, "grad_norm": 13.789295196533203, "learning_rate": 1.042758620689655e-06, "loss": 3.1186, "step": 254 }, { "epoch": 0.8793103448275862, "grad_norm": 11.735766410827637, "learning_rate": 1.046896551724138e-06, "loss": 3.1093, "step": 255 }, { "epoch": 0.8827586206896552, "grad_norm": 4.487181663513184, "learning_rate": 1.0510344827586208e-06, "loss": 3.0733, "step": 256 }, { "epoch": 0.8862068965517241, "grad_norm": 2.9625556468963623, "learning_rate": 1.0551724137931035e-06, "loss": 3.0723, "step": 257 }, { "epoch": 0.8896551724137931, "grad_norm": 6.132961273193359, "learning_rate": 1.0593103448275863e-06, "loss": 3.0787, "step": 258 }, { "epoch": 0.8931034482758621, "grad_norm": 6.996632099151611, "learning_rate": 1.063448275862069e-06, "loss": 3.0953, "step": 259 }, { "epoch": 0.896551724137931, "grad_norm": 6.6302595138549805, "learning_rate": 1.0675862068965517e-06, "loss": 3.1032, "step": 260 }, { "epoch": 0.9, "grad_norm": 6.580592155456543, "learning_rate": 1.0717241379310345e-06, "loss": 3.0712, "step": 261 }, { "epoch": 0.903448275862069, "grad_norm": 4.831645965576172, "learning_rate": 1.0758620689655172e-06, "loss": 3.0777, "step": 262 }, { "epoch": 0.906896551724138, "grad_norm": 2.384838819503784, "learning_rate": 1.08e-06, "loss": 3.084, "step": 263 }, { "epoch": 0.9103448275862069, "grad_norm": 3.1480040550231934, "learning_rate": 1.084137931034483e-06, "loss": 3.0814, "step": 264 }, { "epoch": 0.9137931034482759, "grad_norm": 3.892066240310669, "learning_rate": 1.0882758620689656e-06, "loss": 3.0769, "step": 265 }, { "epoch": 0.9172413793103448, "grad_norm": 3.9789860248565674, "learning_rate": 1.0924137931034484e-06, "loss": 3.0647, "step": 266 }, { "epoch": 0.9206896551724137, "grad_norm": 3.8676273822784424, "learning_rate": 1.096551724137931e-06, "loss": 3.0783, "step": 267 }, { "epoch": 0.9241379310344827, "grad_norm": 3.769120931625366, "learning_rate": 1.1006896551724137e-06, "loss": 3.089, "step": 268 }, { "epoch": 0.9275862068965517, "grad_norm": 1.8051167726516724, "learning_rate": 1.1048275862068966e-06, "loss": 3.0685, "step": 269 }, { "epoch": 0.9310344827586207, "grad_norm": 2.718231678009033, "learning_rate": 1.1089655172413792e-06, "loss": 3.0589, "step": 270 }, { "epoch": 0.9344827586206896, "grad_norm": 4.33424186706543, "learning_rate": 1.1131034482758621e-06, "loss": 3.0457, "step": 271 }, { "epoch": 0.9379310344827586, "grad_norm": 3.7457005977630615, "learning_rate": 1.1172413793103448e-06, "loss": 3.0608, "step": 272 }, { "epoch": 0.9413793103448276, "grad_norm": 3.7556073665618896, "learning_rate": 1.1213793103448276e-06, "loss": 3.0714, "step": 273 }, { "epoch": 0.9448275862068966, "grad_norm": 3.5508155822753906, "learning_rate": 1.1255172413793105e-06, "loss": 3.0846, "step": 274 }, { "epoch": 0.9482758620689655, "grad_norm": 2.5717408657073975, "learning_rate": 1.1296551724137931e-06, "loss": 3.0835, "step": 275 }, { "epoch": 0.9517241379310345, "grad_norm": 27.699665069580078, "learning_rate": 1.1337931034482758e-06, "loss": 3.2209, "step": 276 }, { "epoch": 0.9551724137931035, "grad_norm": 21.60763168334961, "learning_rate": 1.1379310344827587e-06, "loss": 3.1574, "step": 277 }, { "epoch": 0.9586206896551724, "grad_norm": 18.843616485595703, "learning_rate": 1.1420689655172413e-06, "loss": 3.136, "step": 278 }, { "epoch": 0.9620689655172414, "grad_norm": 15.63659381866455, "learning_rate": 1.1462068965517242e-06, "loss": 3.1199, "step": 279 }, { "epoch": 0.9655172413793104, "grad_norm": 12.793663024902344, "learning_rate": 1.1503448275862068e-06, "loss": 3.0783, "step": 280 }, { "epoch": 0.9689655172413794, "grad_norm": 8.753368377685547, "learning_rate": 1.1544827586206897e-06, "loss": 3.0689, "step": 281 }, { "epoch": 0.9724137931034482, "grad_norm": 3.6593589782714844, "learning_rate": 1.1586206896551726e-06, "loss": 3.0479, "step": 282 }, { "epoch": 0.9758620689655172, "grad_norm": 2.5186851024627686, "learning_rate": 1.1627586206896552e-06, "loss": 3.0494, "step": 283 }, { "epoch": 0.9793103448275862, "grad_norm": 6.015842914581299, "learning_rate": 1.1668965517241379e-06, "loss": 3.0627, "step": 284 }, { "epoch": 0.9827586206896551, "grad_norm": 7.416702747344971, "learning_rate": 1.1710344827586207e-06, "loss": 3.0597, "step": 285 }, { "epoch": 0.9862068965517241, "grad_norm": 7.812150001525879, "learning_rate": 1.1751724137931034e-06, "loss": 3.0498, "step": 286 }, { "epoch": 0.9896551724137931, "grad_norm": 7.538205623626709, "learning_rate": 1.1793103448275863e-06, "loss": 3.058, "step": 287 }, { "epoch": 0.993103448275862, "grad_norm": 5.2068376541137695, "learning_rate": 1.183448275862069e-06, "loss": 3.0644, "step": 288 }, { "epoch": 0.996551724137931, "grad_norm": 3.953817844390869, "learning_rate": 1.1875862068965518e-06, "loss": 3.036, "step": 289 }, { "epoch": 1.0, "grad_norm": 7.263755798339844, "learning_rate": 1.1917241379310346e-06, "loss": 3.076, "step": 290 }, { "epoch": 1.0034482758620689, "grad_norm": 22.29129981994629, "learning_rate": 1.1958620689655173e-06, "loss": 3.1368, "step": 291 }, { "epoch": 1.006896551724138, "grad_norm": 18.530858993530273, "learning_rate": 1.2000000000000002e-06, "loss": 3.1153, "step": 292 }, { "epoch": 1.0103448275862068, "grad_norm": 15.297325134277344, "learning_rate": 1.2041379310344828e-06, "loss": 3.0768, "step": 293 }, { "epoch": 1.013793103448276, "grad_norm": 12.156723022460938, "learning_rate": 1.2082758620689655e-06, "loss": 3.0666, "step": 294 }, { "epoch": 1.0172413793103448, "grad_norm": 8.951231956481934, "learning_rate": 1.2124137931034483e-06, "loss": 3.0421, "step": 295 }, { "epoch": 1.0206896551724138, "grad_norm": 2.762389898300171, "learning_rate": 1.216551724137931e-06, "loss": 3.0176, "step": 296 }, { "epoch": 1.0241379310344827, "grad_norm": 3.2061526775360107, "learning_rate": 1.2206896551724136e-06, "loss": 3.0259, "step": 297 }, { "epoch": 1.0275862068965518, "grad_norm": 8.895151138305664, "learning_rate": 1.2248275862068967e-06, "loss": 3.0337, "step": 298 }, { "epoch": 1.0310344827586206, "grad_norm": 6.40523624420166, "learning_rate": 1.2289655172413794e-06, "loss": 3.0372, "step": 299 }, { "epoch": 1.0344827586206897, "grad_norm": 7.349903106689453, "learning_rate": 1.2331034482758622e-06, "loss": 3.0474, "step": 300 }, { "epoch": 1.0379310344827586, "grad_norm": 7.132747173309326, "learning_rate": 1.2372413793103449e-06, "loss": 3.0492, "step": 301 }, { "epoch": 1.0413793103448277, "grad_norm": 5.449552536010742, "learning_rate": 1.2413793103448275e-06, "loss": 3.0269, "step": 302 }, { "epoch": 1.0448275862068965, "grad_norm": 2.2155964374542236, "learning_rate": 1.2455172413793104e-06, "loss": 3.0259, "step": 303 }, { "epoch": 1.0482758620689656, "grad_norm": 2.9261841773986816, "learning_rate": 1.249655172413793e-06, "loss": 3.0289, "step": 304 }, { "epoch": 1.0517241379310345, "grad_norm": 4.901970863342285, "learning_rate": 1.2537931034482757e-06, "loss": 3.026, "step": 305 }, { "epoch": 1.0551724137931036, "grad_norm": 5.194569110870361, "learning_rate": 1.2579310344827588e-06, "loss": 3.0221, "step": 306 }, { "epoch": 1.0586206896551724, "grad_norm": 4.217749118804932, "learning_rate": 1.2620689655172414e-06, "loss": 3.024, "step": 307 }, { "epoch": 1.0620689655172413, "grad_norm": 2.7514936923980713, "learning_rate": 1.2662068965517243e-06, "loss": 3.0285, "step": 308 }, { "epoch": 1.0655172413793104, "grad_norm": 1.772617220878601, "learning_rate": 1.270344827586207e-06, "loss": 3.0205, "step": 309 }, { "epoch": 1.0689655172413792, "grad_norm": 3.2481091022491455, "learning_rate": 1.2744827586206896e-06, "loss": 3.0089, "step": 310 }, { "epoch": 1.0724137931034483, "grad_norm": 2.5727698802948, "learning_rate": 1.2786206896551725e-06, "loss": 3.0055, "step": 311 }, { "epoch": 1.0758620689655172, "grad_norm": 2.749282121658325, "learning_rate": 1.2827586206896551e-06, "loss": 3.0304, "step": 312 }, { "epoch": 1.0793103448275863, "grad_norm": 3.278832197189331, "learning_rate": 1.2868965517241378e-06, "loss": 3.0201, "step": 313 }, { "epoch": 1.0827586206896551, "grad_norm": 2.3543202877044678, "learning_rate": 1.2910344827586209e-06, "loss": 2.9935, "step": 314 }, { "epoch": 1.0862068965517242, "grad_norm": 2.851282835006714, "learning_rate": 1.2951724137931035e-06, "loss": 3.0358, "step": 315 }, { "epoch": 1.089655172413793, "grad_norm": 13.132369995117188, "learning_rate": 1.2993103448275864e-06, "loss": 3.0459, "step": 316 }, { "epoch": 1.0931034482758621, "grad_norm": 9.661304473876953, "learning_rate": 1.303448275862069e-06, "loss": 3.0263, "step": 317 }, { "epoch": 1.096551724137931, "grad_norm": 7.224379062652588, "learning_rate": 1.3075862068965517e-06, "loss": 3.0207, "step": 318 }, { "epoch": 1.1, "grad_norm": 3.509124279022217, "learning_rate": 1.3117241379310345e-06, "loss": 2.9946, "step": 319 }, { "epoch": 1.103448275862069, "grad_norm": 2.785688877105713, "learning_rate": 1.3158620689655172e-06, "loss": 3.007, "step": 320 }, { "epoch": 1.106896551724138, "grad_norm": 5.433321475982666, "learning_rate": 1.3199999999999999e-06, "loss": 3.0039, "step": 321 }, { "epoch": 1.110344827586207, "grad_norm": 3.541982412338257, "learning_rate": 1.3241379310344827e-06, "loss": 2.999, "step": 322 }, { "epoch": 1.113793103448276, "grad_norm": 1.5318679809570312, "learning_rate": 1.3282758620689656e-06, "loss": 2.9906, "step": 323 }, { "epoch": 1.1172413793103448, "grad_norm": 5.295134544372559, "learning_rate": 1.3324137931034485e-06, "loss": 2.979, "step": 324 }, { "epoch": 1.1206896551724137, "grad_norm": 5.326470851898193, "learning_rate": 1.336551724137931e-06, "loss": 2.9991, "step": 325 }, { "epoch": 1.1241379310344828, "grad_norm": 5.663086414337158, "learning_rate": 1.3406896551724138e-06, "loss": 2.9916, "step": 326 }, { "epoch": 1.1275862068965516, "grad_norm": 4.363563537597656, "learning_rate": 1.3448275862068966e-06, "loss": 2.9831, "step": 327 }, { "epoch": 1.1310344827586207, "grad_norm": 1.6378021240234375, "learning_rate": 1.3489655172413793e-06, "loss": 2.9967, "step": 328 }, { "epoch": 1.1344827586206896, "grad_norm": 4.8045334815979, "learning_rate": 1.353103448275862e-06, "loss": 2.9743, "step": 329 }, { "epoch": 1.1379310344827587, "grad_norm": 5.800516128540039, "learning_rate": 1.3572413793103448e-06, "loss": 2.9788, "step": 330 }, { "epoch": 1.1413793103448275, "grad_norm": 5.498095512390137, "learning_rate": 1.3613793103448277e-06, "loss": 2.9908, "step": 331 }, { "epoch": 1.1448275862068966, "grad_norm": 4.2834930419921875, "learning_rate": 1.3655172413793105e-06, "loss": 2.9763, "step": 332 }, { "epoch": 1.1482758620689655, "grad_norm": 2.348924398422241, "learning_rate": 1.3696551724137932e-06, "loss": 2.9542, "step": 333 }, { "epoch": 1.1517241379310346, "grad_norm": 2.2376549243927, "learning_rate": 1.3737931034482758e-06, "loss": 2.9789, "step": 334 }, { "epoch": 1.1551724137931034, "grad_norm": 3.6376001834869385, "learning_rate": 1.3779310344827587e-06, "loss": 2.9834, "step": 335 }, { "epoch": 1.1586206896551725, "grad_norm": 2.9132235050201416, "learning_rate": 1.3820689655172413e-06, "loss": 2.9588, "step": 336 }, { "epoch": 1.1620689655172414, "grad_norm": 3.3057479858398438, "learning_rate": 1.3862068965517242e-06, "loss": 2.9798, "step": 337 }, { "epoch": 1.1655172413793102, "grad_norm": 2.182056188583374, "learning_rate": 1.3903448275862069e-06, "loss": 2.9674, "step": 338 }, { "epoch": 1.1689655172413793, "grad_norm": 4.655999183654785, "learning_rate": 1.3944827586206897e-06, "loss": 2.9677, "step": 339 }, { "epoch": 1.1724137931034484, "grad_norm": 4.718807220458984, "learning_rate": 1.3986206896551726e-06, "loss": 2.9887, "step": 340 }, { "epoch": 1.1758620689655173, "grad_norm": 7.0982842445373535, "learning_rate": 1.4027586206896553e-06, "loss": 2.9693, "step": 341 }, { "epoch": 1.1793103448275861, "grad_norm": 3.53790283203125, "learning_rate": 1.406896551724138e-06, "loss": 2.9403, "step": 342 }, { "epoch": 1.1827586206896552, "grad_norm": 2.101619005203247, "learning_rate": 1.4110344827586208e-06, "loss": 2.9208, "step": 343 }, { "epoch": 1.186206896551724, "grad_norm": 2.4116992950439453, "learning_rate": 1.4151724137931034e-06, "loss": 2.9296, "step": 344 }, { "epoch": 1.1896551724137931, "grad_norm": 3.4151358604431152, "learning_rate": 1.4193103448275863e-06, "loss": 2.9054, "step": 345 }, { "epoch": 1.193103448275862, "grad_norm": 1.7513326406478882, "learning_rate": 1.423448275862069e-06, "loss": 2.9154, "step": 346 }, { "epoch": 1.196551724137931, "grad_norm": 1.8829772472381592, "learning_rate": 1.4275862068965516e-06, "loss": 2.9169, "step": 347 }, { "epoch": 1.2, "grad_norm": 2.171987295150757, "learning_rate": 1.4317241379310347e-06, "loss": 2.9007, "step": 348 }, { "epoch": 1.203448275862069, "grad_norm": 2.317617893218994, "learning_rate": 1.4358620689655173e-06, "loss": 2.9126, "step": 349 }, { "epoch": 1.206896551724138, "grad_norm": 1.5728050470352173, "learning_rate": 1.44e-06, "loss": 2.8904, "step": 350 }, { "epoch": 1.210344827586207, "grad_norm": 3.5177464485168457, "learning_rate": 1.4441379310344828e-06, "loss": 2.8886, "step": 351 }, { "epoch": 1.2137931034482758, "grad_norm": 3.0158307552337646, "learning_rate": 1.4482758620689655e-06, "loss": 2.8759, "step": 352 }, { "epoch": 1.217241379310345, "grad_norm": 4.676311492919922, "learning_rate": 1.4524137931034484e-06, "loss": 2.887, "step": 353 }, { "epoch": 1.2206896551724138, "grad_norm": 1.994409441947937, "learning_rate": 1.456551724137931e-06, "loss": 2.8782, "step": 354 }, { "epoch": 1.2241379310344827, "grad_norm": 2.3709709644317627, "learning_rate": 1.4606896551724137e-06, "loss": 2.8891, "step": 355 }, { "epoch": 1.2275862068965517, "grad_norm": 3.7178452014923096, "learning_rate": 1.4648275862068967e-06, "loss": 2.8799, "step": 356 }, { "epoch": 1.2310344827586206, "grad_norm": 2.9732651710510254, "learning_rate": 1.4689655172413794e-06, "loss": 2.8753, "step": 357 }, { "epoch": 1.2344827586206897, "grad_norm": 2.154505968093872, "learning_rate": 1.473103448275862e-06, "loss": 2.8592, "step": 358 }, { "epoch": 1.2379310344827585, "grad_norm": 3.189167022705078, "learning_rate": 1.477241379310345e-06, "loss": 2.8732, "step": 359 }, { "epoch": 1.2413793103448276, "grad_norm": 2.3412435054779053, "learning_rate": 1.4813793103448276e-06, "loss": 2.8685, "step": 360 }, { "epoch": 1.2448275862068965, "grad_norm": 1.663102388381958, "learning_rate": 1.4855172413793104e-06, "loss": 2.8528, "step": 361 }, { "epoch": 1.2482758620689656, "grad_norm": 1.9362612962722778, "learning_rate": 1.489655172413793e-06, "loss": 2.8524, "step": 362 }, { "epoch": 1.2517241379310344, "grad_norm": 2.19787859916687, "learning_rate": 1.4937931034482757e-06, "loss": 2.8435, "step": 363 }, { "epoch": 1.2551724137931035, "grad_norm": 6.720412254333496, "learning_rate": 1.4979310344827588e-06, "loss": 2.8542, "step": 364 }, { "epoch": 1.2586206896551724, "grad_norm": 2.0270168781280518, "learning_rate": 1.5020689655172415e-06, "loss": 2.9076, "step": 365 }, { "epoch": 1.2620689655172415, "grad_norm": 10.852185249328613, "learning_rate": 1.5062068965517241e-06, "loss": 2.8692, "step": 366 }, { "epoch": 1.2655172413793103, "grad_norm": 6.358669757843018, "learning_rate": 1.510344827586207e-06, "loss": 2.8079, "step": 367 }, { "epoch": 1.2689655172413792, "grad_norm": 2.069445848464966, "learning_rate": 1.5144827586206896e-06, "loss": 2.7943, "step": 368 }, { "epoch": 1.2724137931034483, "grad_norm": 4.169285297393799, "learning_rate": 1.5186206896551725e-06, "loss": 2.7936, "step": 369 }, { "epoch": 1.2758620689655173, "grad_norm": 4.821836471557617, "learning_rate": 1.5227586206896552e-06, "loss": 2.7708, "step": 370 }, { "epoch": 1.2793103448275862, "grad_norm": 2.8127124309539795, "learning_rate": 1.5268965517241378e-06, "loss": 2.7879, "step": 371 }, { "epoch": 1.282758620689655, "grad_norm": 3.2504608631134033, "learning_rate": 1.5310344827586209e-06, "loss": 2.7818, "step": 372 }, { "epoch": 1.2862068965517242, "grad_norm": 2.8954129219055176, "learning_rate": 1.5351724137931035e-06, "loss": 2.7777, "step": 373 }, { "epoch": 1.2896551724137932, "grad_norm": 3.5698204040527344, "learning_rate": 1.5393103448275862e-06, "loss": 2.7754, "step": 374 }, { "epoch": 1.293103448275862, "grad_norm": 2.5645806789398193, "learning_rate": 1.543448275862069e-06, "loss": 2.757, "step": 375 }, { "epoch": 1.296551724137931, "grad_norm": 2.739527463912964, "learning_rate": 1.5475862068965517e-06, "loss": 2.742, "step": 376 }, { "epoch": 1.3, "grad_norm": 3.485123872756958, "learning_rate": 1.5517241379310346e-06, "loss": 2.7748, "step": 377 }, { "epoch": 1.303448275862069, "grad_norm": 2.5349795818328857, "learning_rate": 1.5558620689655172e-06, "loss": 2.7462, "step": 378 }, { "epoch": 1.306896551724138, "grad_norm": 2.419800281524658, "learning_rate": 1.5599999999999999e-06, "loss": 2.7309, "step": 379 }, { "epoch": 1.3103448275862069, "grad_norm": 1.9925339221954346, "learning_rate": 1.5641379310344828e-06, "loss": 2.7165, "step": 380 }, { "epoch": 1.313793103448276, "grad_norm": 3.6589596271514893, "learning_rate": 1.5682758620689656e-06, "loss": 2.7338, "step": 381 }, { "epoch": 1.3172413793103448, "grad_norm": 2.8275575637817383, "learning_rate": 1.5724137931034485e-06, "loss": 2.7192, "step": 382 }, { "epoch": 1.3206896551724139, "grad_norm": 3.301574230194092, "learning_rate": 1.5765517241379311e-06, "loss": 2.7086, "step": 383 }, { "epoch": 1.3241379310344827, "grad_norm": 4.8376593589782715, "learning_rate": 1.5806896551724138e-06, "loss": 2.7105, "step": 384 }, { "epoch": 1.3275862068965516, "grad_norm": 3.877119779586792, "learning_rate": 1.5848275862068967e-06, "loss": 2.7273, "step": 385 }, { "epoch": 1.3310344827586207, "grad_norm": 3.716526985168457, "learning_rate": 1.5889655172413793e-06, "loss": 2.7141, "step": 386 }, { "epoch": 1.3344827586206898, "grad_norm": 2.1246814727783203, "learning_rate": 1.593103448275862e-06, "loss": 2.7036, "step": 387 }, { "epoch": 1.3379310344827586, "grad_norm": 3.3160617351531982, "learning_rate": 1.5972413793103448e-06, "loss": 2.7161, "step": 388 }, { "epoch": 1.3413793103448275, "grad_norm": 2.8204426765441895, "learning_rate": 1.6013793103448277e-06, "loss": 2.7335, "step": 389 }, { "epoch": 1.3448275862068966, "grad_norm": 6.174525260925293, "learning_rate": 1.6055172413793106e-06, "loss": 2.7633, "step": 390 }, { "epoch": 1.3482758620689654, "grad_norm": 10.663763999938965, "learning_rate": 1.6096551724137932e-06, "loss": 2.7382, "step": 391 }, { "epoch": 1.3517241379310345, "grad_norm": 6.284732341766357, "learning_rate": 1.6137931034482759e-06, "loss": 2.6635, "step": 392 }, { "epoch": 1.3551724137931034, "grad_norm": 2.531792640686035, "learning_rate": 1.6179310344827587e-06, "loss": 2.6402, "step": 393 }, { "epoch": 1.3586206896551725, "grad_norm": 5.1249237060546875, "learning_rate": 1.6220689655172414e-06, "loss": 2.6402, "step": 394 }, { "epoch": 1.3620689655172413, "grad_norm": 7.294201374053955, "learning_rate": 1.626206896551724e-06, "loss": 2.635, "step": 395 }, { "epoch": 1.3655172413793104, "grad_norm": 6.90900993347168, "learning_rate": 1.630344827586207e-06, "loss": 2.646, "step": 396 }, { "epoch": 1.3689655172413793, "grad_norm": 4.580862045288086, "learning_rate": 1.6344827586206898e-06, "loss": 2.6017, "step": 397 }, { "epoch": 1.3724137931034484, "grad_norm": 1.5938448905944824, "learning_rate": 1.6386206896551726e-06, "loss": 2.6072, "step": 398 }, { "epoch": 1.3758620689655172, "grad_norm": 4.557816505432129, "learning_rate": 1.6427586206896553e-06, "loss": 2.6058, "step": 399 }, { "epoch": 1.3793103448275863, "grad_norm": 5.9808349609375, "learning_rate": 1.646896551724138e-06, "loss": 2.6089, "step": 400 }, { "epoch": 1.3827586206896552, "grad_norm": 7.703932762145996, "learning_rate": 1.6510344827586208e-06, "loss": 2.6007, "step": 401 }, { "epoch": 1.386206896551724, "grad_norm": 4.101101875305176, "learning_rate": 1.6551724137931035e-06, "loss": 2.5838, "step": 402 }, { "epoch": 1.389655172413793, "grad_norm": 3.5110793113708496, "learning_rate": 1.6593103448275861e-06, "loss": 2.5764, "step": 403 }, { "epoch": 1.3931034482758622, "grad_norm": 3.6313695907592773, "learning_rate": 1.663448275862069e-06, "loss": 2.5567, "step": 404 }, { "epoch": 1.396551724137931, "grad_norm": 4.915844917297363, "learning_rate": 1.6675862068965516e-06, "loss": 2.5571, "step": 405 }, { "epoch": 1.4, "grad_norm": 6.182191848754883, "learning_rate": 1.6717241379310347e-06, "loss": 2.5471, "step": 406 }, { "epoch": 1.403448275862069, "grad_norm": 3.818875551223755, "learning_rate": 1.6758620689655174e-06, "loss": 2.5569, "step": 407 }, { "epoch": 1.4068965517241379, "grad_norm": 1.9461861848831177, "learning_rate": 1.68e-06, "loss": 2.5307, "step": 408 }, { "epoch": 1.410344827586207, "grad_norm": 2.899069309234619, "learning_rate": 1.6841379310344829e-06, "loss": 2.5109, "step": 409 }, { "epoch": 1.4137931034482758, "grad_norm": 6.0998735427856445, "learning_rate": 1.6882758620689655e-06, "loss": 2.5209, "step": 410 }, { "epoch": 1.4172413793103449, "grad_norm": 5.01029109954834, "learning_rate": 1.6924137931034482e-06, "loss": 2.5186, "step": 411 }, { "epoch": 1.4206896551724137, "grad_norm": 6.17518424987793, "learning_rate": 1.696551724137931e-06, "loss": 2.4994, "step": 412 }, { "epoch": 1.4241379310344828, "grad_norm": 2.0568199157714844, "learning_rate": 1.7006896551724137e-06, "loss": 2.4975, "step": 413 }, { "epoch": 1.4275862068965517, "grad_norm": 2.8412842750549316, "learning_rate": 1.7048275862068968e-06, "loss": 2.5292, "step": 414 }, { "epoch": 1.4310344827586206, "grad_norm": 6.788323402404785, "learning_rate": 1.7089655172413794e-06, "loss": 2.5839, "step": 415 }, { "epoch": 1.4344827586206896, "grad_norm": 8.563782691955566, "learning_rate": 1.713103448275862e-06, "loss": 2.4879, "step": 416 }, { "epoch": 1.4379310344827587, "grad_norm": 5.264765739440918, "learning_rate": 1.717241379310345e-06, "loss": 2.4444, "step": 417 }, { "epoch": 1.4413793103448276, "grad_norm": 7.740565776824951, "learning_rate": 1.7213793103448276e-06, "loss": 2.4015, "step": 418 }, { "epoch": 1.4448275862068964, "grad_norm": 3.072421073913574, "learning_rate": 1.7255172413793103e-06, "loss": 2.3937, "step": 419 }, { "epoch": 1.4482758620689655, "grad_norm": 3.7309465408325195, "learning_rate": 1.7296551724137931e-06, "loss": 2.3765, "step": 420 }, { "epoch": 1.4517241379310346, "grad_norm": 2.5515785217285156, "learning_rate": 1.7337931034482758e-06, "loss": 2.376, "step": 421 }, { "epoch": 1.4551724137931035, "grad_norm": 3.339576482772827, "learning_rate": 1.7379310344827588e-06, "loss": 2.3545, "step": 422 }, { "epoch": 1.4586206896551723, "grad_norm": 4.1153082847595215, "learning_rate": 1.7420689655172415e-06, "loss": 2.3519, "step": 423 }, { "epoch": 1.4620689655172414, "grad_norm": 13.324134826660156, "learning_rate": 1.7462068965517242e-06, "loss": 2.3051, "step": 424 }, { "epoch": 1.4655172413793103, "grad_norm": 3.9902303218841553, "learning_rate": 1.750344827586207e-06, "loss": 2.3473, "step": 425 }, { "epoch": 1.4689655172413794, "grad_norm": 6.1618523597717285, "learning_rate": 1.7544827586206897e-06, "loss": 2.3014, "step": 426 }, { "epoch": 1.4724137931034482, "grad_norm": 4.495691299438477, "learning_rate": 1.7586206896551725e-06, "loss": 2.303, "step": 427 }, { "epoch": 1.4758620689655173, "grad_norm": 2.887627363204956, "learning_rate": 1.7627586206896552e-06, "loss": 2.3076, "step": 428 }, { "epoch": 1.4793103448275862, "grad_norm": 5.088809490203857, "learning_rate": 1.7668965517241378e-06, "loss": 2.2892, "step": 429 }, { "epoch": 1.4827586206896552, "grad_norm": 2.3927996158599854, "learning_rate": 1.7710344827586207e-06, "loss": 2.278, "step": 430 }, { "epoch": 1.486206896551724, "grad_norm": 4.085698127746582, "learning_rate": 1.7751724137931036e-06, "loss": 2.2608, "step": 431 }, { "epoch": 1.489655172413793, "grad_norm": 4.215930461883545, "learning_rate": 1.7793103448275862e-06, "loss": 2.2876, "step": 432 }, { "epoch": 1.493103448275862, "grad_norm": 4.286078453063965, "learning_rate": 1.783448275862069e-06, "loss": 2.2651, "step": 433 }, { "epoch": 1.4965517241379311, "grad_norm": 11.080854415893555, "learning_rate": 1.7875862068965517e-06, "loss": 2.2569, "step": 434 }, { "epoch": 1.5, "grad_norm": 3.2200074195861816, "learning_rate": 1.7917241379310346e-06, "loss": 2.24, "step": 435 }, { "epoch": 1.5034482758620689, "grad_norm": 5.641878604888916, "learning_rate": 1.7958620689655173e-06, "loss": 2.2449, "step": 436 }, { "epoch": 1.506896551724138, "grad_norm": 3.4213368892669678, "learning_rate": 1.8e-06, "loss": 2.1982, "step": 437 }, { "epoch": 1.510344827586207, "grad_norm": 2.599501609802246, "learning_rate": 1.8041379310344828e-06, "loss": 2.2244, "step": 438 }, { "epoch": 1.513793103448276, "grad_norm": 4.835009574890137, "learning_rate": 1.8082758620689656e-06, "loss": 2.2289, "step": 439 }, { "epoch": 1.5172413793103448, "grad_norm": 6.577223777770996, "learning_rate": 1.8124137931034483e-06, "loss": 2.283, "step": 440 }, { "epoch": 1.5206896551724138, "grad_norm": 9.540290832519531, "learning_rate": 1.8165517241379312e-06, "loss": 2.1468, "step": 441 }, { "epoch": 1.524137931034483, "grad_norm": 7.244075298309326, "learning_rate": 1.8206896551724138e-06, "loss": 2.1317, "step": 442 }, { "epoch": 1.5275862068965518, "grad_norm": 4.1710429191589355, "learning_rate": 1.8248275862068967e-06, "loss": 2.1066, "step": 443 }, { "epoch": 1.5310344827586206, "grad_norm": 22.28694725036621, "learning_rate": 1.8289655172413793e-06, "loss": 2.0938, "step": 444 }, { "epoch": 1.5344827586206895, "grad_norm": 6.053286075592041, "learning_rate": 1.833103448275862e-06, "loss": 2.0821, "step": 445 }, { "epoch": 1.5379310344827586, "grad_norm": 8.98820972442627, "learning_rate": 1.8372413793103449e-06, "loss": 2.0993, "step": 446 }, { "epoch": 1.5413793103448277, "grad_norm": 10.329743385314941, "learning_rate": 1.8413793103448277e-06, "loss": 2.0482, "step": 447 }, { "epoch": 1.5448275862068965, "grad_norm": 6.187282085418701, "learning_rate": 1.8455172413793104e-06, "loss": 2.0197, "step": 448 }, { "epoch": 1.5482758620689654, "grad_norm": 3.3709757328033447, "learning_rate": 1.8496551724137932e-06, "loss": 2.0172, "step": 449 }, { "epoch": 1.5517241379310345, "grad_norm": 3.658298969268799, "learning_rate": 1.8537931034482759e-06, "loss": 2.0092, "step": 450 }, { "epoch": 1.5551724137931036, "grad_norm": 4.059633731842041, "learning_rate": 1.8579310344827588e-06, "loss": 2.0426, "step": 451 }, { "epoch": 1.5586206896551724, "grad_norm": 3.143115520477295, "learning_rate": 1.8620689655172414e-06, "loss": 2.0418, "step": 452 }, { "epoch": 1.5620689655172413, "grad_norm": 3.5357449054718018, "learning_rate": 1.866206896551724e-06, "loss": 2.0, "step": 453 }, { "epoch": 1.5655172413793104, "grad_norm": 3.3469033241271973, "learning_rate": 1.870344827586207e-06, "loss": 1.9676, "step": 454 }, { "epoch": 1.5689655172413794, "grad_norm": 4.120237827301025, "learning_rate": 1.8744827586206896e-06, "loss": 2.0129, "step": 455 }, { "epoch": 1.5724137931034483, "grad_norm": 2.703338146209717, "learning_rate": 1.8786206896551722e-06, "loss": 1.9763, "step": 456 }, { "epoch": 1.5758620689655172, "grad_norm": 25.229570388793945, "learning_rate": 1.882758620689655e-06, "loss": 1.9481, "step": 457 }, { "epoch": 1.5793103448275863, "grad_norm": 5.69981575012207, "learning_rate": 1.8868965517241378e-06, "loss": 1.9393, "step": 458 }, { "epoch": 1.5827586206896553, "grad_norm": 3.9299731254577637, "learning_rate": 1.8910344827586206e-06, "loss": 1.9323, "step": 459 }, { "epoch": 1.5862068965517242, "grad_norm": 10.532322883605957, "learning_rate": 1.8951724137931037e-06, "loss": 1.9364, "step": 460 }, { "epoch": 1.589655172413793, "grad_norm": 3.107978343963623, "learning_rate": 1.8993103448275864e-06, "loss": 1.9312, "step": 461 }, { "epoch": 1.593103448275862, "grad_norm": 3.864436149597168, "learning_rate": 1.9034482758620692e-06, "loss": 1.908, "step": 462 }, { "epoch": 1.596551724137931, "grad_norm": 5.061004638671875, "learning_rate": 1.907586206896552e-06, "loss": 1.9083, "step": 463 }, { "epoch": 1.6, "grad_norm": 16.50563621520996, "learning_rate": 1.9117241379310345e-06, "loss": 1.9925, "step": 464 }, { "epoch": 1.603448275862069, "grad_norm": 6.571671485900879, "learning_rate": 1.9158620689655174e-06, "loss": 2.0592, "step": 465 }, { "epoch": 1.6068965517241378, "grad_norm": 5.826593399047852, "learning_rate": 1.9200000000000003e-06, "loss": 1.9036, "step": 466 }, { "epoch": 1.610344827586207, "grad_norm": 3.8580801486968994, "learning_rate": 1.9241379310344827e-06, "loss": 1.7953, "step": 467 }, { "epoch": 1.613793103448276, "grad_norm": 3.1883544921875, "learning_rate": 1.9282758620689656e-06, "loss": 1.8027, "step": 468 }, { "epoch": 1.6172413793103448, "grad_norm": 3.5665009021759033, "learning_rate": 1.9324137931034484e-06, "loss": 1.7617, "step": 469 }, { "epoch": 1.6206896551724137, "grad_norm": 7.621574401855469, "learning_rate": 1.936551724137931e-06, "loss": 1.7865, "step": 470 }, { "epoch": 1.6241379310344828, "grad_norm": 3.506699800491333, "learning_rate": 1.9406896551724137e-06, "loss": 1.7627, "step": 471 }, { "epoch": 1.6275862068965519, "grad_norm": 2.6892764568328857, "learning_rate": 1.9448275862068966e-06, "loss": 1.7753, "step": 472 }, { "epoch": 1.6310344827586207, "grad_norm": 6.0713276863098145, "learning_rate": 1.948965517241379e-06, "loss": 1.7623, "step": 473 }, { "epoch": 1.6344827586206896, "grad_norm": 2.984954833984375, "learning_rate": 1.953103448275862e-06, "loss": 1.7228, "step": 474 }, { "epoch": 1.6379310344827587, "grad_norm": 3.128737211227417, "learning_rate": 1.9572413793103448e-06, "loss": 1.7435, "step": 475 }, { "epoch": 1.6413793103448275, "grad_norm": 2.2811715602874756, "learning_rate": 1.9613793103448276e-06, "loss": 1.7606, "step": 476 }, { "epoch": 1.6448275862068966, "grad_norm": 3.34032940864563, "learning_rate": 1.9655172413793105e-06, "loss": 1.7539, "step": 477 }, { "epoch": 1.6482758620689655, "grad_norm": 3.4975569248199463, "learning_rate": 1.9696551724137934e-06, "loss": 1.7026, "step": 478 }, { "epoch": 1.6517241379310343, "grad_norm": 2.2683706283569336, "learning_rate": 1.9737931034482762e-06, "loss": 1.7248, "step": 479 }, { "epoch": 1.6551724137931034, "grad_norm": 3.5305676460266113, "learning_rate": 1.9779310344827587e-06, "loss": 1.7027, "step": 480 }, { "epoch": 1.6586206896551725, "grad_norm": 16.018512725830078, "learning_rate": 1.9820689655172415e-06, "loss": 1.709, "step": 481 }, { "epoch": 1.6620689655172414, "grad_norm": 2.5114991664886475, "learning_rate": 1.9862068965517244e-06, "loss": 1.6821, "step": 482 }, { "epoch": 1.6655172413793102, "grad_norm": 5.493526458740234, "learning_rate": 1.990344827586207e-06, "loss": 1.6594, "step": 483 }, { "epoch": 1.6689655172413793, "grad_norm": 2.997056722640991, "learning_rate": 1.9944827586206897e-06, "loss": 1.7095, "step": 484 }, { "epoch": 1.6724137931034484, "grad_norm": 3.5512354373931885, "learning_rate": 1.9986206896551726e-06, "loss": 1.6644, "step": 485 }, { "epoch": 1.6758620689655173, "grad_norm": 4.0412187576293945, "learning_rate": 2.002758620689655e-06, "loss": 1.66, "step": 486 }, { "epoch": 1.6793103448275861, "grad_norm": 6.003110408782959, "learning_rate": 2.006896551724138e-06, "loss": 1.6266, "step": 487 }, { "epoch": 1.6827586206896552, "grad_norm": 4.315246105194092, "learning_rate": 2.0110344827586207e-06, "loss": 1.6736, "step": 488 }, { "epoch": 1.6862068965517243, "grad_norm": 6.079290390014648, "learning_rate": 2.015172413793103e-06, "loss": 1.6768, "step": 489 }, { "epoch": 1.6896551724137931, "grad_norm": 15.033203125, "learning_rate": 2.019310344827586e-06, "loss": 1.7877, "step": 490 }, { "epoch": 1.693103448275862, "grad_norm": 6.281724452972412, "learning_rate": 2.023448275862069e-06, "loss": 1.6247, "step": 491 }, { "epoch": 1.6965517241379309, "grad_norm": 4.834966659545898, "learning_rate": 2.0275862068965518e-06, "loss": 1.5571, "step": 492 }, { "epoch": 1.7, "grad_norm": 2.9997949600219727, "learning_rate": 2.0317241379310346e-06, "loss": 1.5035, "step": 493 }, { "epoch": 1.703448275862069, "grad_norm": 2.898606300354004, "learning_rate": 2.0358620689655175e-06, "loss": 1.5195, "step": 494 }, { "epoch": 1.706896551724138, "grad_norm": 5.896825313568115, "learning_rate": 2.0400000000000004e-06, "loss": 1.5356, "step": 495 }, { "epoch": 1.7103448275862068, "grad_norm": 4.864215850830078, "learning_rate": 2.044137931034483e-06, "loss": 1.4672, "step": 496 }, { "epoch": 1.7137931034482758, "grad_norm": 3.1615781784057617, "learning_rate": 2.0482758620689657e-06, "loss": 1.5042, "step": 497 }, { "epoch": 1.717241379310345, "grad_norm": 2.9907054901123047, "learning_rate": 2.0524137931034485e-06, "loss": 1.5244, "step": 498 }, { "epoch": 1.7206896551724138, "grad_norm": 3.6789655685424805, "learning_rate": 2.056551724137931e-06, "loss": 1.4761, "step": 499 }, { "epoch": 1.7241379310344827, "grad_norm": 4.085512161254883, "learning_rate": 2.060689655172414e-06, "loss": 1.4934, "step": 500 }, { "epoch": 1.7275862068965517, "grad_norm": 2.124560594558716, "learning_rate": 2.0648275862068967e-06, "loss": 1.4605, "step": 501 }, { "epoch": 1.7310344827586208, "grad_norm": 4.5591301918029785, "learning_rate": 2.068965517241379e-06, "loss": 1.4798, "step": 502 }, { "epoch": 1.7344827586206897, "grad_norm": 5.589193820953369, "learning_rate": 2.073103448275862e-06, "loss": 1.4879, "step": 503 }, { "epoch": 1.7379310344827585, "grad_norm": 3.2566239833831787, "learning_rate": 2.077241379310345e-06, "loss": 1.5379, "step": 504 }, { "epoch": 1.7413793103448276, "grad_norm": 6.895675182342529, "learning_rate": 2.0813793103448273e-06, "loss": 1.4652, "step": 505 }, { "epoch": 1.7448275862068967, "grad_norm": 3.057904005050659, "learning_rate": 2.08551724137931e-06, "loss": 1.4662, "step": 506 }, { "epoch": 1.7482758620689656, "grad_norm": 3.4798085689544678, "learning_rate": 2.089655172413793e-06, "loss": 1.4471, "step": 507 }, { "epoch": 1.7517241379310344, "grad_norm": 2.594557285308838, "learning_rate": 2.093793103448276e-06, "loss": 1.4664, "step": 508 }, { "epoch": 1.7551724137931033, "grad_norm": 4.179352283477783, "learning_rate": 2.0979310344827584e-06, "loss": 1.4965, "step": 509 }, { "epoch": 1.7586206896551724, "grad_norm": 4.462242603302002, "learning_rate": 2.1020689655172417e-06, "loss": 1.4651, "step": 510 }, { "epoch": 1.7620689655172415, "grad_norm": 2.975325107574463, "learning_rate": 2.1062068965517245e-06, "loss": 1.4942, "step": 511 }, { "epoch": 1.7655172413793103, "grad_norm": 2.477570056915283, "learning_rate": 2.110344827586207e-06, "loss": 1.4572, "step": 512 }, { "epoch": 1.7689655172413792, "grad_norm": 2.934225559234619, "learning_rate": 2.11448275862069e-06, "loss": 1.4679, "step": 513 }, { "epoch": 1.7724137931034483, "grad_norm": 3.81902813911438, "learning_rate": 2.1186206896551727e-06, "loss": 1.4968, "step": 514 }, { "epoch": 1.7758620689655173, "grad_norm": 5.400010585784912, "learning_rate": 2.122758620689655e-06, "loss": 1.667, "step": 515 }, { "epoch": 1.7793103448275862, "grad_norm": 4.931059837341309, "learning_rate": 2.126896551724138e-06, "loss": 1.4026, "step": 516 }, { "epoch": 1.782758620689655, "grad_norm": 3.708153009414673, "learning_rate": 2.131034482758621e-06, "loss": 1.3569, "step": 517 }, { "epoch": 1.7862068965517242, "grad_norm": 5.100454807281494, "learning_rate": 2.1351724137931033e-06, "loss": 1.3361, "step": 518 }, { "epoch": 1.7896551724137932, "grad_norm": 4.524672031402588, "learning_rate": 2.139310344827586e-06, "loss": 1.339, "step": 519 }, { "epoch": 1.793103448275862, "grad_norm": 9.821247100830078, "learning_rate": 2.143448275862069e-06, "loss": 1.3422, "step": 520 }, { "epoch": 1.796551724137931, "grad_norm": 2.734733819961548, "learning_rate": 2.147586206896552e-06, "loss": 1.3628, "step": 521 }, { "epoch": 1.8, "grad_norm": 4.70095157623291, "learning_rate": 2.1517241379310343e-06, "loss": 1.3302, "step": 522 }, { "epoch": 1.8034482758620691, "grad_norm": 4.899191856384277, "learning_rate": 2.155862068965517e-06, "loss": 1.2925, "step": 523 }, { "epoch": 1.806896551724138, "grad_norm": 3.8109679222106934, "learning_rate": 2.16e-06, "loss": 1.3496, "step": 524 }, { "epoch": 1.8103448275862069, "grad_norm": 3.8673768043518066, "learning_rate": 2.1641379310344825e-06, "loss": 1.3078, "step": 525 }, { "epoch": 1.8137931034482757, "grad_norm": 3.338390827178955, "learning_rate": 2.168275862068966e-06, "loss": 1.3256, "step": 526 }, { "epoch": 1.8172413793103448, "grad_norm": 3.1568045616149902, "learning_rate": 2.1724137931034487e-06, "loss": 1.3366, "step": 527 }, { "epoch": 1.8206896551724139, "grad_norm": 2.0985710620880127, "learning_rate": 2.176551724137931e-06, "loss": 1.2574, "step": 528 }, { "epoch": 1.8241379310344827, "grad_norm": 3.5250725746154785, "learning_rate": 2.180689655172414e-06, "loss": 1.3369, "step": 529 }, { "epoch": 1.8275862068965516, "grad_norm": 3.2137832641601562, "learning_rate": 2.184827586206897e-06, "loss": 1.362, "step": 530 }, { "epoch": 1.8310344827586207, "grad_norm": 3.0406529903411865, "learning_rate": 2.1889655172413793e-06, "loss": 1.2959, "step": 531 }, { "epoch": 1.8344827586206898, "grad_norm": 5.975280284881592, "learning_rate": 2.193103448275862e-06, "loss": 1.2853, "step": 532 }, { "epoch": 1.8379310344827586, "grad_norm": 18.010412216186523, "learning_rate": 2.197241379310345e-06, "loss": 1.3266, "step": 533 }, { "epoch": 1.8413793103448275, "grad_norm": 10.504039764404297, "learning_rate": 2.2013793103448275e-06, "loss": 1.323, "step": 534 }, { "epoch": 1.8448275862068966, "grad_norm": 4.035922050476074, "learning_rate": 2.2055172413793103e-06, "loss": 1.367, "step": 535 }, { "epoch": 1.8482758620689657, "grad_norm": 5.319116115570068, "learning_rate": 2.209655172413793e-06, "loss": 1.3489, "step": 536 }, { "epoch": 1.8517241379310345, "grad_norm": 5.499382495880127, "learning_rate": 2.213793103448276e-06, "loss": 1.3192, "step": 537 }, { "epoch": 1.8551724137931034, "grad_norm": 4.161999225616455, "learning_rate": 2.2179310344827585e-06, "loss": 1.4011, "step": 538 }, { "epoch": 1.8586206896551725, "grad_norm": 2.590684413909912, "learning_rate": 2.2220689655172414e-06, "loss": 1.3701, "step": 539 }, { "epoch": 1.8620689655172413, "grad_norm": 6.575713157653809, "learning_rate": 2.2262068965517242e-06, "loss": 1.5752, "step": 540 }, { "epoch": 1.8655172413793104, "grad_norm": 4.382113933563232, "learning_rate": 2.2303448275862067e-06, "loss": 1.2592, "step": 541 }, { "epoch": 1.8689655172413793, "grad_norm": 2.413707733154297, "learning_rate": 2.2344827586206895e-06, "loss": 1.1858, "step": 542 }, { "epoch": 1.8724137931034481, "grad_norm": 2.238353967666626, "learning_rate": 2.238620689655173e-06, "loss": 1.2491, "step": 543 }, { "epoch": 1.8758620689655172, "grad_norm": 5.579054832458496, "learning_rate": 2.2427586206896553e-06, "loss": 1.1606, "step": 544 }, { "epoch": 1.8793103448275863, "grad_norm": 3.630858898162842, "learning_rate": 2.246896551724138e-06, "loss": 1.2251, "step": 545 }, { "epoch": 1.8827586206896552, "grad_norm": 3.9410512447357178, "learning_rate": 2.251034482758621e-06, "loss": 1.1893, "step": 546 }, { "epoch": 1.886206896551724, "grad_norm": 3.135802984237671, "learning_rate": 2.2551724137931034e-06, "loss": 1.2181, "step": 547 }, { "epoch": 1.889655172413793, "grad_norm": NaN, "learning_rate": 2.2551724137931034e-06, "loss": 1.2174, "step": 548 }, { "epoch": 1.8931034482758622, "grad_norm": 2.6863536834716797, "learning_rate": 2.2593103448275863e-06, "loss": 1.2086, "step": 549 }, { "epoch": 1.896551724137931, "grad_norm": 3.4757511615753174, "learning_rate": 2.263448275862069e-06, "loss": 1.2074, "step": 550 }, { "epoch": 1.9, "grad_norm": 3.62119460105896, "learning_rate": 2.2675862068965516e-06, "loss": 1.1891, "step": 551 }, { "epoch": 1.903448275862069, "grad_norm": 2.7560508251190186, "learning_rate": 2.2717241379310345e-06, "loss": 1.2199, "step": 552 }, { "epoch": 1.906896551724138, "grad_norm": 3.376249313354492, "learning_rate": 2.2758620689655173e-06, "loss": 1.2445, "step": 553 }, { "epoch": 1.910344827586207, "grad_norm": 3.411372661590576, "learning_rate": 2.28e-06, "loss": 1.2232, "step": 554 }, { "epoch": 1.9137931034482758, "grad_norm": 3.621738910675049, "learning_rate": 2.2841379310344826e-06, "loss": 1.2526, "step": 555 }, { "epoch": 1.9172413793103447, "grad_norm": 3.448282241821289, "learning_rate": 2.2882758620689655e-06, "loss": 1.2076, "step": 556 }, { "epoch": 1.9206896551724137, "grad_norm": 3.020202875137329, "learning_rate": 2.2924137931034484e-06, "loss": 1.2356, "step": 557 }, { "epoch": 1.9241379310344828, "grad_norm": 2.3426244258880615, "learning_rate": 2.296551724137931e-06, "loss": 1.2413, "step": 558 }, { "epoch": 1.9275862068965517, "grad_norm": 5.525071144104004, "learning_rate": 2.3006896551724137e-06, "loss": 1.2154, "step": 559 }, { "epoch": 1.9310344827586206, "grad_norm": 2.4195799827575684, "learning_rate": 2.3048275862068965e-06, "loss": 1.2202, "step": 560 }, { "epoch": 1.9344827586206896, "grad_norm": 3.6405367851257324, "learning_rate": 2.3089655172413794e-06, "loss": 1.2768, "step": 561 }, { "epoch": 1.9379310344827587, "grad_norm": 3.2947797775268555, "learning_rate": 2.3131034482758623e-06, "loss": 1.2409, "step": 562 }, { "epoch": 1.9413793103448276, "grad_norm": 3.413235902786255, "learning_rate": 2.317241379310345e-06, "loss": 1.2389, "step": 563 }, { "epoch": 1.9448275862068964, "grad_norm": 2.886854648590088, "learning_rate": 2.3213793103448276e-06, "loss": 1.3344, "step": 564 }, { "epoch": 1.9482758620689655, "grad_norm": 6.9042887687683105, "learning_rate": 2.3255172413793104e-06, "loss": 1.444, "step": 565 }, { "epoch": 1.9517241379310346, "grad_norm": 3.2939679622650146, "learning_rate": 2.3296551724137933e-06, "loss": 1.1446, "step": 566 }, { "epoch": 1.9551724137931035, "grad_norm": 3.6916043758392334, "learning_rate": 2.3337931034482757e-06, "loss": 1.1478, "step": 567 }, { "epoch": 1.9586206896551723, "grad_norm": 2.6276895999908447, "learning_rate": 2.3379310344827586e-06, "loss": 1.1278, "step": 568 }, { "epoch": 1.9620689655172414, "grad_norm": 2.9521961212158203, "learning_rate": 2.3420689655172415e-06, "loss": 1.1182, "step": 569 }, { "epoch": 1.9655172413793105, "grad_norm": 3.170592784881592, "learning_rate": 2.3462068965517243e-06, "loss": 1.1107, "step": 570 }, { "epoch": 1.9689655172413794, "grad_norm": 4.079660892486572, "learning_rate": 2.3503448275862068e-06, "loss": 1.1093, "step": 571 }, { "epoch": 1.9724137931034482, "grad_norm": 7.9969258308410645, "learning_rate": 2.3544827586206896e-06, "loss": 1.157, "step": 572 }, { "epoch": 1.975862068965517, "grad_norm": 4.274134635925293, "learning_rate": 2.3586206896551725e-06, "loss": 1.1282, "step": 573 }, { "epoch": 1.9793103448275862, "grad_norm": 4.635529041290283, "learning_rate": 2.362758620689655e-06, "loss": 1.2047, "step": 574 }, { "epoch": 1.9827586206896552, "grad_norm": 4.103023052215576, "learning_rate": 2.366896551724138e-06, "loss": 1.156, "step": 575 }, { "epoch": 1.986206896551724, "grad_norm": 2.307750701904297, "learning_rate": 2.3710344827586207e-06, "loss": 1.19, "step": 576 }, { "epoch": 1.989655172413793, "grad_norm": 2.7361559867858887, "learning_rate": 2.3751724137931035e-06, "loss": 1.1568, "step": 577 }, { "epoch": 1.993103448275862, "grad_norm": 3.385753631591797, "learning_rate": 2.3793103448275864e-06, "loss": 1.2232, "step": 578 }, { "epoch": 1.9965517241379311, "grad_norm": 6.525738716125488, "learning_rate": 2.3834482758620693e-06, "loss": 1.2402, "step": 579 }, { "epoch": 2.0, "grad_norm": 6.702201843261719, "learning_rate": 2.3875862068965517e-06, "loss": 1.3958, "step": 580 }, { "epoch": 2.003448275862069, "grad_norm": 5.5869340896606445, "learning_rate": 2.3917241379310346e-06, "loss": 1.1348, "step": 581 }, { "epoch": 2.0068965517241377, "grad_norm": 3.1165096759796143, "learning_rate": 2.3958620689655174e-06, "loss": 1.0893, "step": 582 }, { "epoch": 2.010344827586207, "grad_norm": 2.588050603866577, "learning_rate": 2.4000000000000003e-06, "loss": 1.0631, "step": 583 }, { "epoch": 2.013793103448276, "grad_norm": 2.40844988822937, "learning_rate": 2.4041379310344828e-06, "loss": 1.0562, "step": 584 }, { "epoch": 2.0172413793103448, "grad_norm": 2.0949089527130127, "learning_rate": 2.4082758620689656e-06, "loss": 1.0506, "step": 585 }, { "epoch": 2.0206896551724136, "grad_norm": 4.22530460357666, "learning_rate": 2.4124137931034485e-06, "loss": 1.0866, "step": 586 }, { "epoch": 2.024137931034483, "grad_norm": 4.263517379760742, "learning_rate": 2.416551724137931e-06, "loss": 1.0986, "step": 587 }, { "epoch": 2.027586206896552, "grad_norm": 8.435966491699219, "learning_rate": 2.420689655172414e-06, "loss": 1.0612, "step": 588 }, { "epoch": 2.0310344827586206, "grad_norm": 2.312739372253418, "learning_rate": 2.4248275862068967e-06, "loss": 1.0987, "step": 589 }, { "epoch": 2.0344827586206895, "grad_norm": 3.8369486331939697, "learning_rate": 2.428965517241379e-06, "loss": 1.0493, "step": 590 }, { "epoch": 2.037931034482759, "grad_norm": 3.420102119445801, "learning_rate": 2.433103448275862e-06, "loss": 1.0716, "step": 591 }, { "epoch": 2.0413793103448277, "grad_norm": 2.3202216625213623, "learning_rate": 2.437241379310345e-06, "loss": 1.0682, "step": 592 }, { "epoch": 2.0448275862068965, "grad_norm": 2.2011618614196777, "learning_rate": 2.4413793103448273e-06, "loss": 1.1093, "step": 593 }, { "epoch": 2.0482758620689654, "grad_norm": 2.436129093170166, "learning_rate": 2.4455172413793106e-06, "loss": 1.09, "step": 594 }, { "epoch": 2.0517241379310347, "grad_norm": 2.539137363433838, "learning_rate": 2.4496551724137934e-06, "loss": 1.102, "step": 595 }, { "epoch": 2.0551724137931036, "grad_norm": 2.233057737350464, "learning_rate": 2.453793103448276e-06, "loss": 1.0475, "step": 596 }, { "epoch": 2.0586206896551724, "grad_norm": 5.652006149291992, "learning_rate": 2.4579310344827587e-06, "loss": 1.0721, "step": 597 }, { "epoch": 2.0620689655172413, "grad_norm": 3.6274197101593018, "learning_rate": 2.4620689655172416e-06, "loss": 1.0832, "step": 598 }, { "epoch": 2.06551724137931, "grad_norm": 6.065937042236328, "learning_rate": 2.4662068965517245e-06, "loss": 1.1237, "step": 599 }, { "epoch": 2.0689655172413794, "grad_norm": 5.782344818115234, "learning_rate": 2.470344827586207e-06, "loss": 1.0718, "step": 600 }, { "epoch": 2.0724137931034483, "grad_norm": 3.931534767150879, "learning_rate": 2.4744827586206898e-06, "loss": 1.1381, "step": 601 }, { "epoch": 2.075862068965517, "grad_norm": 4.547845363616943, "learning_rate": 2.4786206896551726e-06, "loss": 1.1079, "step": 602 }, { "epoch": 2.079310344827586, "grad_norm": 3.474576950073242, "learning_rate": 2.482758620689655e-06, "loss": 1.143, "step": 603 }, { "epoch": 2.0827586206896553, "grad_norm": 2.6151013374328613, "learning_rate": 2.486896551724138e-06, "loss": 1.2219, "step": 604 }, { "epoch": 2.086206896551724, "grad_norm": 11.680089950561523, "learning_rate": 2.491034482758621e-06, "loss": 1.3098, "step": 605 }, { "epoch": 2.089655172413793, "grad_norm": 7.607903480529785, "learning_rate": 2.4951724137931032e-06, "loss": 1.0653, "step": 606 }, { "epoch": 2.093103448275862, "grad_norm": 2.5806760787963867, "learning_rate": 2.499310344827586e-06, "loss": 0.985, "step": 607 }, { "epoch": 2.0965517241379312, "grad_norm": 2.8834455013275146, "learning_rate": 2.503448275862069e-06, "loss": 1.0363, "step": 608 }, { "epoch": 2.1, "grad_norm": 1.8135639429092407, "learning_rate": 2.5075862068965514e-06, "loss": 0.9685, "step": 609 }, { "epoch": 2.103448275862069, "grad_norm": 3.072044610977173, "learning_rate": 2.5117241379310347e-06, "loss": 0.966, "step": 610 }, { "epoch": 2.106896551724138, "grad_norm": 3.9685218334198, "learning_rate": 2.5158620689655176e-06, "loss": 1.0156, "step": 611 }, { "epoch": 2.110344827586207, "grad_norm": 1.914488434791565, "learning_rate": 2.52e-06, "loss": 1.0341, "step": 612 }, { "epoch": 2.113793103448276, "grad_norm": 2.2604146003723145, "learning_rate": 2.524137931034483e-06, "loss": 1.0336, "step": 613 }, { "epoch": 2.117241379310345, "grad_norm": 1.7574996948242188, "learning_rate": 2.5282758620689657e-06, "loss": 1.0322, "step": 614 }, { "epoch": 2.1206896551724137, "grad_norm": 4.6853437423706055, "learning_rate": 2.5324137931034486e-06, "loss": 0.985, "step": 615 }, { "epoch": 2.1241379310344826, "grad_norm": 1.9071950912475586, "learning_rate": 2.536551724137931e-06, "loss": 1.0059, "step": 616 }, { "epoch": 2.127586206896552, "grad_norm": 2.081974506378174, "learning_rate": 2.540689655172414e-06, "loss": 1.0497, "step": 617 }, { "epoch": 2.1310344827586207, "grad_norm": 3.0604193210601807, "learning_rate": 2.5448275862068968e-06, "loss": 1.049, "step": 618 }, { "epoch": 2.1344827586206896, "grad_norm": 3.9624743461608887, "learning_rate": 2.5489655172413792e-06, "loss": 1.0029, "step": 619 }, { "epoch": 2.1379310344827585, "grad_norm": 2.3073689937591553, "learning_rate": 2.553103448275862e-06, "loss": 1.0524, "step": 620 }, { "epoch": 2.1413793103448278, "grad_norm": 2.822221040725708, "learning_rate": 2.557241379310345e-06, "loss": 1.0619, "step": 621 }, { "epoch": 2.1448275862068966, "grad_norm": 5.773874759674072, "learning_rate": 2.5613793103448274e-06, "loss": 1.0233, "step": 622 }, { "epoch": 2.1482758620689655, "grad_norm": 2.497006416320801, "learning_rate": 2.5655172413793103e-06, "loss": 1.0396, "step": 623 }, { "epoch": 2.1517241379310343, "grad_norm": 2.4922759532928467, "learning_rate": 2.569655172413793e-06, "loss": 1.0515, "step": 624 }, { "epoch": 2.1551724137931036, "grad_norm": 3.1011834144592285, "learning_rate": 2.5737931034482756e-06, "loss": 1.0587, "step": 625 }, { "epoch": 2.1586206896551725, "grad_norm": 4.544788837432861, "learning_rate": 2.5779310344827584e-06, "loss": 1.0878, "step": 626 }, { "epoch": 2.1620689655172414, "grad_norm": 2.7421391010284424, "learning_rate": 2.5820689655172417e-06, "loss": 1.1077, "step": 627 }, { "epoch": 2.1655172413793102, "grad_norm": 2.4918582439422607, "learning_rate": 2.5862068965517246e-06, "loss": 1.1019, "step": 628 }, { "epoch": 2.1689655172413795, "grad_norm": 5.459743976593018, "learning_rate": 2.590344827586207e-06, "loss": 1.1439, "step": 629 }, { "epoch": 2.1724137931034484, "grad_norm": 11.345252990722656, "learning_rate": 2.59448275862069e-06, "loss": 1.4158, "step": 630 }, { "epoch": 2.1758620689655173, "grad_norm": 2.8170950412750244, "learning_rate": 2.5986206896551728e-06, "loss": 0.9833, "step": 631 }, { "epoch": 2.179310344827586, "grad_norm": 2.0338289737701416, "learning_rate": 2.602758620689655e-06, "loss": 0.9522, "step": 632 }, { "epoch": 2.182758620689655, "grad_norm": 1.6466248035430908, "learning_rate": 2.606896551724138e-06, "loss": 0.9819, "step": 633 }, { "epoch": 2.1862068965517243, "grad_norm": 1.7877452373504639, "learning_rate": 2.611034482758621e-06, "loss": 0.9967, "step": 634 }, { "epoch": 2.189655172413793, "grad_norm": 3.15472412109375, "learning_rate": 2.6151724137931034e-06, "loss": 1.0078, "step": 635 }, { "epoch": 2.193103448275862, "grad_norm": 1.6804815530776978, "learning_rate": 2.6193103448275862e-06, "loss": 0.9314, "step": 636 }, { "epoch": 2.196551724137931, "grad_norm": 1.8934149742126465, "learning_rate": 2.623448275862069e-06, "loss": 0.9621, "step": 637 }, { "epoch": 2.2, "grad_norm": 10.101994514465332, "learning_rate": 2.6275862068965515e-06, "loss": 0.9742, "step": 638 }, { "epoch": 2.203448275862069, "grad_norm": 2.244854688644409, "learning_rate": 2.6317241379310344e-06, "loss": 0.9415, "step": 639 }, { "epoch": 2.206896551724138, "grad_norm": 1.9139894247055054, "learning_rate": 2.6358620689655173e-06, "loss": 0.9657, "step": 640 }, { "epoch": 2.2103448275862068, "grad_norm": 2.0368237495422363, "learning_rate": 2.6399999999999997e-06, "loss": 0.9975, "step": 641 }, { "epoch": 2.213793103448276, "grad_norm": 2.3111484050750732, "learning_rate": 2.6441379310344826e-06, "loss": 0.9934, "step": 642 }, { "epoch": 2.217241379310345, "grad_norm": 2.0525102615356445, "learning_rate": 2.6482758620689654e-06, "loss": 0.9349, "step": 643 }, { "epoch": 2.220689655172414, "grad_norm": 1.9655874967575073, "learning_rate": 2.6524137931034487e-06, "loss": 0.9559, "step": 644 }, { "epoch": 2.2241379310344827, "grad_norm": 2.294050693511963, "learning_rate": 2.656551724137931e-06, "loss": 0.9858, "step": 645 }, { "epoch": 2.227586206896552, "grad_norm": 6.511058330535889, "learning_rate": 2.660689655172414e-06, "loss": 0.9962, "step": 646 }, { "epoch": 2.231034482758621, "grad_norm": 2.875495672225952, "learning_rate": 2.664827586206897e-06, "loss": 0.9894, "step": 647 }, { "epoch": 2.2344827586206897, "grad_norm": 5.129367828369141, "learning_rate": 2.6689655172413793e-06, "loss": 0.981, "step": 648 }, { "epoch": 2.2379310344827585, "grad_norm": 3.3366353511810303, "learning_rate": 2.673103448275862e-06, "loss": 1.0126, "step": 649 }, { "epoch": 2.2413793103448274, "grad_norm": 4.078788757324219, "learning_rate": 2.677241379310345e-06, "loss": 1.0143, "step": 650 }, { "epoch": 2.2448275862068967, "grad_norm": 2.317655563354492, "learning_rate": 2.6813793103448275e-06, "loss": 1.0043, "step": 651 }, { "epoch": 2.2482758620689656, "grad_norm": 6.491637706756592, "learning_rate": 2.6855172413793104e-06, "loss": 1.0085, "step": 652 }, { "epoch": 2.2517241379310344, "grad_norm": 2.7804360389709473, "learning_rate": 2.6896551724137932e-06, "loss": 1.0384, "step": 653 }, { "epoch": 2.2551724137931033, "grad_norm": 3.8283960819244385, "learning_rate": 2.6937931034482757e-06, "loss": 1.1078, "step": 654 }, { "epoch": 2.2586206896551726, "grad_norm": 5.549128532409668, "learning_rate": 2.6979310344827586e-06, "loss": 1.3286, "step": 655 }, { "epoch": 2.2620689655172415, "grad_norm": 2.9716689586639404, "learning_rate": 2.7020689655172414e-06, "loss": 0.9922, "step": 656 }, { "epoch": 2.2655172413793103, "grad_norm": 1.9567259550094604, "learning_rate": 2.706206896551724e-06, "loss": 0.9369, "step": 657 }, { "epoch": 2.268965517241379, "grad_norm": 5.108131408691406, "learning_rate": 2.7103448275862067e-06, "loss": 0.9559, "step": 658 }, { "epoch": 2.272413793103448, "grad_norm": 1.4260375499725342, "learning_rate": 2.7144827586206896e-06, "loss": 0.9233, "step": 659 }, { "epoch": 2.2758620689655173, "grad_norm": 4.820898056030273, "learning_rate": 2.718620689655173e-06, "loss": 0.9339, "step": 660 }, { "epoch": 2.279310344827586, "grad_norm": 2.788088798522949, "learning_rate": 2.7227586206896553e-06, "loss": 0.9193, "step": 661 }, { "epoch": 2.282758620689655, "grad_norm": 1.8852156400680542, "learning_rate": 2.726896551724138e-06, "loss": 0.9076, "step": 662 }, { "epoch": 2.2862068965517244, "grad_norm": 1.6354094743728638, "learning_rate": 2.731034482758621e-06, "loss": 0.8927, "step": 663 }, { "epoch": 2.2896551724137932, "grad_norm": 1.6787278652191162, "learning_rate": 2.7351724137931035e-06, "loss": 0.8944, "step": 664 }, { "epoch": 2.293103448275862, "grad_norm": 4.576758861541748, "learning_rate": 2.7393103448275864e-06, "loss": 0.9556, "step": 665 }, { "epoch": 2.296551724137931, "grad_norm": 3.0693395137786865, "learning_rate": 2.7434482758620692e-06, "loss": 0.9329, "step": 666 }, { "epoch": 2.3, "grad_norm": 2.167356491088867, "learning_rate": 2.7475862068965517e-06, "loss": 0.9596, "step": 667 }, { "epoch": 2.303448275862069, "grad_norm": 2.850677013397217, "learning_rate": 2.7517241379310345e-06, "loss": 0.9202, "step": 668 }, { "epoch": 2.306896551724138, "grad_norm": 2.9239416122436523, "learning_rate": 2.7558620689655174e-06, "loss": 0.9137, "step": 669 }, { "epoch": 2.310344827586207, "grad_norm": 5.083246231079102, "learning_rate": 2.76e-06, "loss": 1.0035, "step": 670 }, { "epoch": 2.3137931034482757, "grad_norm": 2.097248077392578, "learning_rate": 2.7641379310344827e-06, "loss": 0.9728, "step": 671 }, { "epoch": 2.317241379310345, "grad_norm": 3.4359378814697266, "learning_rate": 2.7682758620689656e-06, "loss": 0.9374, "step": 672 }, { "epoch": 2.320689655172414, "grad_norm": 4.8909592628479, "learning_rate": 2.7724137931034484e-06, "loss": 0.9311, "step": 673 }, { "epoch": 2.3241379310344827, "grad_norm": 2.9949514865875244, "learning_rate": 2.776551724137931e-06, "loss": 0.9947, "step": 674 }, { "epoch": 2.3275862068965516, "grad_norm": 2.792440414428711, "learning_rate": 2.7806896551724137e-06, "loss": 0.9766, "step": 675 }, { "epoch": 2.3310344827586205, "grad_norm": 2.7395036220550537, "learning_rate": 2.7848275862068966e-06, "loss": 0.941, "step": 676 }, { "epoch": 2.3344827586206898, "grad_norm": 3.2487940788269043, "learning_rate": 2.7889655172413795e-06, "loss": 0.9842, "step": 677 }, { "epoch": 2.3379310344827586, "grad_norm": 3.2322049140930176, "learning_rate": 2.7931034482758623e-06, "loss": 0.9651, "step": 678 }, { "epoch": 2.3413793103448275, "grad_norm": 3.439302921295166, "learning_rate": 2.797241379310345e-06, "loss": 1.0379, "step": 679 }, { "epoch": 2.344827586206897, "grad_norm": 4.784821510314941, "learning_rate": 2.8013793103448276e-06, "loss": 1.2982, "step": 680 }, { "epoch": 2.3482758620689657, "grad_norm": 2.482414960861206, "learning_rate": 2.8055172413793105e-06, "loss": 0.9341, "step": 681 }, { "epoch": 2.3517241379310345, "grad_norm": 2.266867160797119, "learning_rate": 2.8096551724137934e-06, "loss": 0.9005, "step": 682 }, { "epoch": 2.3551724137931034, "grad_norm": 2.868816614151001, "learning_rate": 2.813793103448276e-06, "loss": 0.8497, "step": 683 }, { "epoch": 2.3586206896551722, "grad_norm": 1.9789481163024902, "learning_rate": 2.8179310344827587e-06, "loss": 0.8776, "step": 684 }, { "epoch": 2.3620689655172415, "grad_norm": 3.446631908416748, "learning_rate": 2.8220689655172415e-06, "loss": 0.8577, "step": 685 }, { "epoch": 2.3655172413793104, "grad_norm": 1.4721077680587769, "learning_rate": 2.826206896551724e-06, "loss": 0.8225, "step": 686 }, { "epoch": 2.3689655172413793, "grad_norm": 1.8748798370361328, "learning_rate": 2.830344827586207e-06, "loss": 0.8932, "step": 687 }, { "epoch": 2.372413793103448, "grad_norm": 2.196218729019165, "learning_rate": 2.8344827586206897e-06, "loss": 0.8991, "step": 688 }, { "epoch": 2.3758620689655174, "grad_norm": 1.8698573112487793, "learning_rate": 2.8386206896551726e-06, "loss": 0.8559, "step": 689 }, { "epoch": 2.3793103448275863, "grad_norm": 2.3785715103149414, "learning_rate": 2.842758620689655e-06, "loss": 0.9205, "step": 690 }, { "epoch": 2.382758620689655, "grad_norm": 2.0262176990509033, "learning_rate": 2.846896551724138e-06, "loss": 0.88, "step": 691 }, { "epoch": 2.386206896551724, "grad_norm": 1.9961202144622803, "learning_rate": 2.8510344827586207e-06, "loss": 0.9553, "step": 692 }, { "epoch": 2.389655172413793, "grad_norm": 2.076063394546509, "learning_rate": 2.855172413793103e-06, "loss": 0.905, "step": 693 }, { "epoch": 2.393103448275862, "grad_norm": 2.5672008991241455, "learning_rate": 2.8593103448275865e-06, "loss": 0.9116, "step": 694 }, { "epoch": 2.396551724137931, "grad_norm": 3.705395221710205, "learning_rate": 2.8634482758620693e-06, "loss": 0.8949, "step": 695 }, { "epoch": 2.4, "grad_norm": 3.239957332611084, "learning_rate": 2.8675862068965518e-06, "loss": 0.9491, "step": 696 }, { "epoch": 2.4034482758620688, "grad_norm": 2.6844630241394043, "learning_rate": 2.8717241379310346e-06, "loss": 0.8795, "step": 697 }, { "epoch": 2.406896551724138, "grad_norm": 3.229221820831299, "learning_rate": 2.8758620689655175e-06, "loss": 0.9375, "step": 698 }, { "epoch": 2.410344827586207, "grad_norm": 2.2336463928222656, "learning_rate": 2.88e-06, "loss": 0.9289, "step": 699 }, { "epoch": 2.413793103448276, "grad_norm": 5.213966369628906, "learning_rate": 2.884137931034483e-06, "loss": 0.952, "step": 700 }, { "epoch": 2.4172413793103447, "grad_norm": 3.145204544067383, "learning_rate": 2.8882758620689657e-06, "loss": 0.9965, "step": 701 }, { "epoch": 2.420689655172414, "grad_norm": 9.020147323608398, "learning_rate": 2.892413793103448e-06, "loss": 0.9426, "step": 702 }, { "epoch": 2.424137931034483, "grad_norm": 2.901129961013794, "learning_rate": 2.896551724137931e-06, "loss": 0.9713, "step": 703 }, { "epoch": 2.4275862068965517, "grad_norm": 8.80194091796875, "learning_rate": 2.900689655172414e-06, "loss": 1.0426, "step": 704 }, { "epoch": 2.4310344827586206, "grad_norm": 6.164482593536377, "learning_rate": 2.9048275862068967e-06, "loss": 1.1385, "step": 705 }, { "epoch": 2.43448275862069, "grad_norm": 2.836442232131958, "learning_rate": 2.908965517241379e-06, "loss": 0.8834, "step": 706 }, { "epoch": 2.4379310344827587, "grad_norm": 1.7019377946853638, "learning_rate": 2.913103448275862e-06, "loss": 0.8489, "step": 707 }, { "epoch": 2.4413793103448276, "grad_norm": 2.4802653789520264, "learning_rate": 2.917241379310345e-06, "loss": 0.8481, "step": 708 }, { "epoch": 2.4448275862068964, "grad_norm": 2.07865834236145, "learning_rate": 2.9213793103448273e-06, "loss": 0.8492, "step": 709 }, { "epoch": 2.4482758620689653, "grad_norm": 3.464520215988159, "learning_rate": 2.9255172413793106e-06, "loss": 0.8111, "step": 710 }, { "epoch": 2.4517241379310346, "grad_norm": 7.777944564819336, "learning_rate": 2.9296551724137935e-06, "loss": 0.8564, "step": 711 }, { "epoch": 2.4551724137931035, "grad_norm": 5.556765079498291, "learning_rate": 2.933793103448276e-06, "loss": 0.8388, "step": 712 }, { "epoch": 2.4586206896551723, "grad_norm": 2.0672924518585205, "learning_rate": 2.937931034482759e-06, "loss": 0.8939, "step": 713 }, { "epoch": 2.462068965517241, "grad_norm": 2.213162660598755, "learning_rate": 2.9420689655172417e-06, "loss": 0.8239, "step": 714 }, { "epoch": 2.4655172413793105, "grad_norm": 2.5326108932495117, "learning_rate": 2.946206896551724e-06, "loss": 0.8498, "step": 715 }, { "epoch": 2.4689655172413794, "grad_norm": 2.7068302631378174, "learning_rate": 2.950344827586207e-06, "loss": 0.8254, "step": 716 }, { "epoch": 2.472413793103448, "grad_norm": 2.2862157821655273, "learning_rate": 2.95448275862069e-06, "loss": 0.8435, "step": 717 }, { "epoch": 2.475862068965517, "grad_norm": 2.077944040298462, "learning_rate": 2.9586206896551727e-06, "loss": 0.8295, "step": 718 }, { "epoch": 2.4793103448275864, "grad_norm": 2.9760327339172363, "learning_rate": 2.962758620689655e-06, "loss": 0.8656, "step": 719 }, { "epoch": 2.4827586206896552, "grad_norm": 2.197108030319214, "learning_rate": 2.966896551724138e-06, "loss": 0.9264, "step": 720 }, { "epoch": 2.486206896551724, "grad_norm": 1.7395992279052734, "learning_rate": 2.971034482758621e-06, "loss": 0.8844, "step": 721 }, { "epoch": 2.489655172413793, "grad_norm": 3.330944538116455, "learning_rate": 2.9751724137931033e-06, "loss": 0.8672, "step": 722 }, { "epoch": 2.4931034482758623, "grad_norm": 2.6404941082000732, "learning_rate": 2.979310344827586e-06, "loss": 0.9229, "step": 723 }, { "epoch": 2.496551724137931, "grad_norm": 7.694943428039551, "learning_rate": 2.983448275862069e-06, "loss": 0.8596, "step": 724 }, { "epoch": 2.5, "grad_norm": 2.662382125854492, "learning_rate": 2.9875862068965515e-06, "loss": 0.9547, "step": 725 }, { "epoch": 2.503448275862069, "grad_norm": 3.3554413318634033, "learning_rate": 2.9917241379310343e-06, "loss": 0.9809, "step": 726 }, { "epoch": 2.5068965517241377, "grad_norm": 3.515193223953247, "learning_rate": 2.9958620689655176e-06, "loss": 0.9232, "step": 727 }, { "epoch": 2.510344827586207, "grad_norm": 2.9674179553985596, "learning_rate": 3e-06, "loss": 0.9395, "step": 728 }, { "epoch": 2.513793103448276, "grad_norm": 9.792686462402344, "learning_rate": 3.004137931034483e-06, "loss": 1.0432, "step": 729 }, { "epoch": 2.5172413793103448, "grad_norm": 13.67553997039795, "learning_rate": 3.008275862068966e-06, "loss": 1.153, "step": 730 }, { "epoch": 2.520689655172414, "grad_norm": 2.9971377849578857, "learning_rate": 3.0124137931034482e-06, "loss": 0.8608, "step": 731 }, { "epoch": 2.524137931034483, "grad_norm": 1.715222954750061, "learning_rate": 3.016551724137931e-06, "loss": 0.8766, "step": 732 }, { "epoch": 2.527586206896552, "grad_norm": 2.882765293121338, "learning_rate": 3.020689655172414e-06, "loss": 0.8342, "step": 733 }, { "epoch": 2.5310344827586206, "grad_norm": 2.605081558227539, "learning_rate": 3.024827586206897e-06, "loss": 0.8292, "step": 734 }, { "epoch": 2.5344827586206895, "grad_norm": 3.7530148029327393, "learning_rate": 3.0289655172413793e-06, "loss": 0.8582, "step": 735 }, { "epoch": 2.5379310344827584, "grad_norm": 3.5620083808898926, "learning_rate": 3.033103448275862e-06, "loss": 0.8255, "step": 736 }, { "epoch": 2.5413793103448277, "grad_norm": 8.260937690734863, "learning_rate": 3.037241379310345e-06, "loss": 0.8701, "step": 737 }, { "epoch": 2.5448275862068965, "grad_norm": 2.7151646614074707, "learning_rate": 3.0413793103448275e-06, "loss": 0.841, "step": 738 }, { "epoch": 2.5482758620689654, "grad_norm": 1.848530650138855, "learning_rate": 3.0455172413793103e-06, "loss": 0.8269, "step": 739 }, { "epoch": 2.5517241379310347, "grad_norm": 2.6978347301483154, "learning_rate": 3.049655172413793e-06, "loss": 0.7837, "step": 740 }, { "epoch": 2.5551724137931036, "grad_norm": 2.9322566986083984, "learning_rate": 3.0537931034482756e-06, "loss": 0.8553, "step": 741 }, { "epoch": 2.5586206896551724, "grad_norm": 2.7890138626098633, "learning_rate": 3.0579310344827585e-06, "loss": 0.8512, "step": 742 }, { "epoch": 2.5620689655172413, "grad_norm": 4.288728713989258, "learning_rate": 3.0620689655172418e-06, "loss": 0.825, "step": 743 }, { "epoch": 2.56551724137931, "grad_norm": 2.5469136238098145, "learning_rate": 3.0662068965517242e-06, "loss": 0.8801, "step": 744 }, { "epoch": 2.5689655172413794, "grad_norm": 2.081892967224121, "learning_rate": 3.070344827586207e-06, "loss": 0.861, "step": 745 }, { "epoch": 2.5724137931034483, "grad_norm": 2.815885066986084, "learning_rate": 3.07448275862069e-06, "loss": 0.849, "step": 746 }, { "epoch": 2.575862068965517, "grad_norm": 3.3561198711395264, "learning_rate": 3.0786206896551724e-06, "loss": 0.8778, "step": 747 }, { "epoch": 2.5793103448275865, "grad_norm": 2.3429131507873535, "learning_rate": 3.0827586206896553e-06, "loss": 0.8829, "step": 748 }, { "epoch": 2.5827586206896553, "grad_norm": 2.548283576965332, "learning_rate": 3.086896551724138e-06, "loss": 0.9014, "step": 749 }, { "epoch": 2.586206896551724, "grad_norm": 3.8453352451324463, "learning_rate": 3.091034482758621e-06, "loss": 0.8905, "step": 750 }, { "epoch": 2.589655172413793, "grad_norm": 3.531559944152832, "learning_rate": 3.0951724137931034e-06, "loss": 0.9302, "step": 751 }, { "epoch": 2.593103448275862, "grad_norm": 2.982327461242676, "learning_rate": 3.0993103448275863e-06, "loss": 0.9084, "step": 752 }, { "epoch": 2.596551724137931, "grad_norm": 2.8471083641052246, "learning_rate": 3.103448275862069e-06, "loss": 0.9811, "step": 753 }, { "epoch": 2.6, "grad_norm": 3.6485698223114014, "learning_rate": 3.1075862068965516e-06, "loss": 0.9693, "step": 754 }, { "epoch": 2.603448275862069, "grad_norm": 5.537622451782227, "learning_rate": 3.1117241379310345e-06, "loss": 1.1181, "step": 755 }, { "epoch": 2.606896551724138, "grad_norm": 4.091545104980469, "learning_rate": 3.1158620689655173e-06, "loss": 0.9082, "step": 756 }, { "epoch": 2.610344827586207, "grad_norm": 1.8868762254714966, "learning_rate": 3.1199999999999998e-06, "loss": 0.7905, "step": 757 }, { "epoch": 2.613793103448276, "grad_norm": 1.7103972434997559, "learning_rate": 3.1241379310344826e-06, "loss": 0.7803, "step": 758 }, { "epoch": 2.617241379310345, "grad_norm": 2.9469099044799805, "learning_rate": 3.1282758620689655e-06, "loss": 0.7789, "step": 759 }, { "epoch": 2.6206896551724137, "grad_norm": 2.0763840675354004, "learning_rate": 3.1324137931034484e-06, "loss": 0.8216, "step": 760 }, { "epoch": 2.6241379310344826, "grad_norm": 2.242683172225952, "learning_rate": 3.1365517241379312e-06, "loss": 0.8479, "step": 761 }, { "epoch": 2.627586206896552, "grad_norm": 1.4341531991958618, "learning_rate": 3.140689655172414e-06, "loss": 0.806, "step": 762 }, { "epoch": 2.6310344827586207, "grad_norm": 2.1622445583343506, "learning_rate": 3.144827586206897e-06, "loss": 0.8312, "step": 763 }, { "epoch": 2.6344827586206896, "grad_norm": 2.0853993892669678, "learning_rate": 3.1489655172413794e-06, "loss": 0.8254, "step": 764 }, { "epoch": 2.637931034482759, "grad_norm": 1.7636383771896362, "learning_rate": 3.1531034482758623e-06, "loss": 0.8024, "step": 765 }, { "epoch": 2.6413793103448278, "grad_norm": 2.1746997833251953, "learning_rate": 3.157241379310345e-06, "loss": 0.8495, "step": 766 }, { "epoch": 2.6448275862068966, "grad_norm": 1.6781622171401978, "learning_rate": 3.1613793103448276e-06, "loss": 0.8096, "step": 767 }, { "epoch": 2.6482758620689655, "grad_norm": 2.054391622543335, "learning_rate": 3.1655172413793104e-06, "loss": 0.8232, "step": 768 }, { "epoch": 2.6517241379310343, "grad_norm": 1.7386330366134644, "learning_rate": 3.1696551724137933e-06, "loss": 0.8222, "step": 769 }, { "epoch": 2.655172413793103, "grad_norm": 2.9891738891601562, "learning_rate": 3.1737931034482757e-06, "loss": 0.8426, "step": 770 }, { "epoch": 2.6586206896551725, "grad_norm": 1.9876527786254883, "learning_rate": 3.1779310344827586e-06, "loss": 0.7844, "step": 771 }, { "epoch": 2.6620689655172414, "grad_norm": 2.026078939437866, "learning_rate": 3.1820689655172415e-06, "loss": 0.8289, "step": 772 }, { "epoch": 2.6655172413793102, "grad_norm": 3.9277255535125732, "learning_rate": 3.186206896551724e-06, "loss": 0.8367, "step": 773 }, { "epoch": 2.6689655172413795, "grad_norm": 2.4815778732299805, "learning_rate": 3.1903448275862068e-06, "loss": 0.853, "step": 774 }, { "epoch": 2.6724137931034484, "grad_norm": 3.041745662689209, "learning_rate": 3.1944827586206897e-06, "loss": 0.8814, "step": 775 }, { "epoch": 2.6758620689655173, "grad_norm": 2.291062355041504, "learning_rate": 3.198620689655172e-06, "loss": 0.8655, "step": 776 }, { "epoch": 2.679310344827586, "grad_norm": 18.09175682067871, "learning_rate": 3.2027586206896554e-06, "loss": 0.8819, "step": 777 }, { "epoch": 2.682758620689655, "grad_norm": 2.5881481170654297, "learning_rate": 3.2068965517241382e-06, "loss": 0.8768, "step": 778 }, { "epoch": 2.6862068965517243, "grad_norm": 3.716038465499878, "learning_rate": 3.211034482758621e-06, "loss": 0.9345, "step": 779 }, { "epoch": 2.689655172413793, "grad_norm": 5.0855183601379395, "learning_rate": 3.2151724137931036e-06, "loss": 1.169, "step": 780 }, { "epoch": 2.693103448275862, "grad_norm": 2.416203498840332, "learning_rate": 3.2193103448275864e-06, "loss": 0.8531, "step": 781 }, { "epoch": 2.696551724137931, "grad_norm": 1.7603034973144531, "learning_rate": 3.2234482758620693e-06, "loss": 0.7933, "step": 782 }, { "epoch": 2.7, "grad_norm": 3.875692844390869, "learning_rate": 3.2275862068965517e-06, "loss": 0.7475, "step": 783 }, { "epoch": 2.703448275862069, "grad_norm": 1.5397127866744995, "learning_rate": 3.2317241379310346e-06, "loss": 0.7483, "step": 784 }, { "epoch": 2.706896551724138, "grad_norm": 1.5346156358718872, "learning_rate": 3.2358620689655175e-06, "loss": 0.7446, "step": 785 }, { "epoch": 2.7103448275862068, "grad_norm": 1.994001030921936, "learning_rate": 3.24e-06, "loss": 0.7678, "step": 786 }, { "epoch": 2.7137931034482756, "grad_norm": 2.17653751373291, "learning_rate": 3.2441379310344828e-06, "loss": 0.7774, "step": 787 }, { "epoch": 2.717241379310345, "grad_norm": 2.267241954803467, "learning_rate": 3.2482758620689656e-06, "loss": 0.7531, "step": 788 }, { "epoch": 2.720689655172414, "grad_norm": 2.2455215454101562, "learning_rate": 3.252413793103448e-06, "loss": 0.758, "step": 789 }, { "epoch": 2.7241379310344827, "grad_norm": 1.8742413520812988, "learning_rate": 3.256551724137931e-06, "loss": 0.7936, "step": 790 }, { "epoch": 2.727586206896552, "grad_norm": 1.7980722188949585, "learning_rate": 3.260689655172414e-06, "loss": 0.7699, "step": 791 }, { "epoch": 2.731034482758621, "grad_norm": 1.7503551244735718, "learning_rate": 3.2648275862068962e-06, "loss": 0.8452, "step": 792 }, { "epoch": 2.7344827586206897, "grad_norm": 2.1845333576202393, "learning_rate": 3.2689655172413795e-06, "loss": 0.8146, "step": 793 }, { "epoch": 2.7379310344827585, "grad_norm": 3.040494441986084, "learning_rate": 3.2731034482758624e-06, "loss": 0.7813, "step": 794 }, { "epoch": 2.7413793103448274, "grad_norm": 1.9916458129882812, "learning_rate": 3.2772413793103453e-06, "loss": 0.7756, "step": 795 }, { "epoch": 2.7448275862068967, "grad_norm": 1.961888313293457, "learning_rate": 3.2813793103448277e-06, "loss": 0.7726, "step": 796 }, { "epoch": 2.7482758620689656, "grad_norm": 2.3146934509277344, "learning_rate": 3.2855172413793106e-06, "loss": 0.8197, "step": 797 }, { "epoch": 2.7517241379310344, "grad_norm": 2.329580783843994, "learning_rate": 3.2896551724137934e-06, "loss": 0.8397, "step": 798 }, { "epoch": 2.7551724137931033, "grad_norm": 2.1568105220794678, "learning_rate": 3.293793103448276e-06, "loss": 0.7895, "step": 799 }, { "epoch": 2.7586206896551726, "grad_norm": 2.0861778259277344, "learning_rate": 3.2979310344827587e-06, "loss": 0.8359, "step": 800 }, { "epoch": 2.7620689655172415, "grad_norm": 5.5911407470703125, "learning_rate": 3.3020689655172416e-06, "loss": 0.8421, "step": 801 }, { "epoch": 2.7655172413793103, "grad_norm": 4.056722640991211, "learning_rate": 3.306206896551724e-06, "loss": 0.8128, "step": 802 }, { "epoch": 2.768965517241379, "grad_norm": 3.5190210342407227, "learning_rate": 3.310344827586207e-06, "loss": 0.9285, "step": 803 }, { "epoch": 2.772413793103448, "grad_norm": 7.616758346557617, "learning_rate": 3.3144827586206898e-06, "loss": 0.8468, "step": 804 }, { "epoch": 2.7758620689655173, "grad_norm": 3.342083692550659, "learning_rate": 3.3186206896551722e-06, "loss": 1.0774, "step": 805 }, { "epoch": 2.779310344827586, "grad_norm": 2.336209535598755, "learning_rate": 3.322758620689655e-06, "loss": 0.818, "step": 806 }, { "epoch": 2.782758620689655, "grad_norm": 2.5581486225128174, "learning_rate": 3.326896551724138e-06, "loss": 0.7454, "step": 807 }, { "epoch": 2.7862068965517244, "grad_norm": 2.0229716300964355, "learning_rate": 3.331034482758621e-06, "loss": 0.7798, "step": 808 }, { "epoch": 2.7896551724137932, "grad_norm": 3.052600383758545, "learning_rate": 3.3351724137931033e-06, "loss": 0.7516, "step": 809 }, { "epoch": 2.793103448275862, "grad_norm": 3.107213258743286, "learning_rate": 3.3393103448275865e-06, "loss": 0.7597, "step": 810 }, { "epoch": 2.796551724137931, "grad_norm": 8.507162094116211, "learning_rate": 3.3434482758620694e-06, "loss": 0.7414, "step": 811 }, { "epoch": 2.8, "grad_norm": 2.7965245246887207, "learning_rate": 3.347586206896552e-06, "loss": 0.7626, "step": 812 }, { "epoch": 2.803448275862069, "grad_norm": 2.188042640686035, "learning_rate": 3.3517241379310347e-06, "loss": 0.7557, "step": 813 }, { "epoch": 2.806896551724138, "grad_norm": 1.7063300609588623, "learning_rate": 3.3558620689655176e-06, "loss": 0.8007, "step": 814 }, { "epoch": 2.810344827586207, "grad_norm": 1.8098276853561401, "learning_rate": 3.36e-06, "loss": 0.7399, "step": 815 }, { "epoch": 2.8137931034482757, "grad_norm": 3.64959979057312, "learning_rate": 3.364137931034483e-06, "loss": 0.7948, "step": 816 }, { "epoch": 2.817241379310345, "grad_norm": 3.974214553833008, "learning_rate": 3.3682758620689657e-06, "loss": 0.7668, "step": 817 }, { "epoch": 2.820689655172414, "grad_norm": 2.3448379039764404, "learning_rate": 3.372413793103448e-06, "loss": 0.7668, "step": 818 }, { "epoch": 2.8241379310344827, "grad_norm": 3.1560328006744385, "learning_rate": 3.376551724137931e-06, "loss": 0.8169, "step": 819 }, { "epoch": 2.8275862068965516, "grad_norm": 2.842949151992798, "learning_rate": 3.380689655172414e-06, "loss": 0.7906, "step": 820 }, { "epoch": 2.8310344827586205, "grad_norm": 1.8072847127914429, "learning_rate": 3.3848275862068964e-06, "loss": 0.8019, "step": 821 }, { "epoch": 2.8344827586206898, "grad_norm": 2.3653416633605957, "learning_rate": 3.3889655172413792e-06, "loss": 0.8154, "step": 822 }, { "epoch": 2.8379310344827586, "grad_norm": 2.463113784790039, "learning_rate": 3.393103448275862e-06, "loss": 0.7235, "step": 823 }, { "epoch": 2.8413793103448275, "grad_norm": 3.1599032878875732, "learning_rate": 3.397241379310345e-06, "loss": 0.8387, "step": 824 }, { "epoch": 2.844827586206897, "grad_norm": 2.197352409362793, "learning_rate": 3.4013793103448274e-06, "loss": 0.7863, "step": 825 }, { "epoch": 2.8482758620689657, "grad_norm": 3.66207218170166, "learning_rate": 3.4055172413793103e-06, "loss": 0.7849, "step": 826 }, { "epoch": 2.8517241379310345, "grad_norm": 2.1531946659088135, "learning_rate": 3.4096551724137936e-06, "loss": 0.8279, "step": 827 }, { "epoch": 2.8551724137931034, "grad_norm": 2.6813018321990967, "learning_rate": 3.413793103448276e-06, "loss": 0.8317, "step": 828 }, { "epoch": 2.8586206896551722, "grad_norm": 2.5485026836395264, "learning_rate": 3.417931034482759e-06, "loss": 0.8668, "step": 829 }, { "epoch": 2.862068965517241, "grad_norm": 4.1010613441467285, "learning_rate": 3.4220689655172417e-06, "loss": 1.0873, "step": 830 }, { "epoch": 2.8655172413793104, "grad_norm": 1.8779953718185425, "learning_rate": 3.426206896551724e-06, "loss": 0.7795, "step": 831 }, { "epoch": 2.8689655172413793, "grad_norm": 1.7464228868484497, "learning_rate": 3.430344827586207e-06, "loss": 0.7277, "step": 832 }, { "epoch": 2.872413793103448, "grad_norm": 13.424588203430176, "learning_rate": 3.43448275862069e-06, "loss": 0.7293, "step": 833 }, { "epoch": 2.8758620689655174, "grad_norm": 1.7513413429260254, "learning_rate": 3.4386206896551723e-06, "loss": 0.7254, "step": 834 }, { "epoch": 2.8793103448275863, "grad_norm": 2.509876012802124, "learning_rate": 3.442758620689655e-06, "loss": 0.758, "step": 835 }, { "epoch": 2.882758620689655, "grad_norm": 1.696871280670166, "learning_rate": 3.446896551724138e-06, "loss": 0.716, "step": 836 }, { "epoch": 2.886206896551724, "grad_norm": 2.9511871337890625, "learning_rate": 3.4510344827586205e-06, "loss": 0.7746, "step": 837 }, { "epoch": 2.889655172413793, "grad_norm": 2.749140501022339, "learning_rate": 3.4551724137931034e-06, "loss": 0.815, "step": 838 }, { "epoch": 2.893103448275862, "grad_norm": 2.0831105709075928, "learning_rate": 3.4593103448275862e-06, "loss": 0.7194, "step": 839 }, { "epoch": 2.896551724137931, "grad_norm": 1.7938263416290283, "learning_rate": 3.463448275862069e-06, "loss": 0.7183, "step": 840 }, { "epoch": 2.9, "grad_norm": 8.612885475158691, "learning_rate": 3.4675862068965515e-06, "loss": 0.7734, "step": 841 }, { "epoch": 2.903448275862069, "grad_norm": 2.2805774211883545, "learning_rate": 3.4717241379310344e-06, "loss": 0.7667, "step": 842 }, { "epoch": 2.906896551724138, "grad_norm": 1.8321439027786255, "learning_rate": 3.4758620689655177e-06, "loss": 0.7356, "step": 843 }, { "epoch": 2.910344827586207, "grad_norm": 1.8422828912734985, "learning_rate": 3.48e-06, "loss": 0.7444, "step": 844 }, { "epoch": 2.913793103448276, "grad_norm": 2.7624614238739014, "learning_rate": 3.484137931034483e-06, "loss": 0.7869, "step": 845 }, { "epoch": 2.9172413793103447, "grad_norm": 2.9517431259155273, "learning_rate": 3.488275862068966e-06, "loss": 0.792, "step": 846 }, { "epoch": 2.9206896551724135, "grad_norm": 1.7975549697875977, "learning_rate": 3.4924137931034483e-06, "loss": 0.8208, "step": 847 }, { "epoch": 2.924137931034483, "grad_norm": 2.082636833190918, "learning_rate": 3.496551724137931e-06, "loss": 0.8002, "step": 848 }, { "epoch": 2.9275862068965517, "grad_norm": 2.3157737255096436, "learning_rate": 3.500689655172414e-06, "loss": 0.7735, "step": 849 }, { "epoch": 2.9310344827586206, "grad_norm": 2.1787805557250977, "learning_rate": 3.5048275862068965e-06, "loss": 0.798, "step": 850 }, { "epoch": 2.93448275862069, "grad_norm": 3.1368255615234375, "learning_rate": 3.5089655172413793e-06, "loss": 0.7982, "step": 851 }, { "epoch": 2.9379310344827587, "grad_norm": 4.4477858543396, "learning_rate": 3.5131034482758622e-06, "loss": 0.7977, "step": 852 }, { "epoch": 2.9413793103448276, "grad_norm": 2.3506572246551514, "learning_rate": 3.517241379310345e-06, "loss": 0.8249, "step": 853 }, { "epoch": 2.9448275862068964, "grad_norm": 2.569699287414551, "learning_rate": 3.5213793103448275e-06, "loss": 0.8345, "step": 854 }, { "epoch": 2.9482758620689653, "grad_norm": 3.905564785003662, "learning_rate": 3.5255172413793104e-06, "loss": 1.0738, "step": 855 }, { "epoch": 2.9517241379310346, "grad_norm": 1.8683215379714966, "learning_rate": 3.5296551724137932e-06, "loss": 0.7598, "step": 856 }, { "epoch": 2.9551724137931035, "grad_norm": 2.6221985816955566, "learning_rate": 3.5337931034482757e-06, "loss": 0.73, "step": 857 }, { "epoch": 2.9586206896551723, "grad_norm": 1.5847296714782715, "learning_rate": 3.5379310344827586e-06, "loss": 0.7002, "step": 858 }, { "epoch": 2.9620689655172416, "grad_norm": 1.607698917388916, "learning_rate": 3.5420689655172414e-06, "loss": 0.7217, "step": 859 }, { "epoch": 2.9655172413793105, "grad_norm": 1.9299365282058716, "learning_rate": 3.5462068965517243e-06, "loss": 0.721, "step": 860 }, { "epoch": 2.9689655172413794, "grad_norm": 1.7335811853408813, "learning_rate": 3.550344827586207e-06, "loss": 0.7269, "step": 861 }, { "epoch": 2.972413793103448, "grad_norm": 1.5420185327529907, "learning_rate": 3.55448275862069e-06, "loss": 0.7434, "step": 862 }, { "epoch": 2.975862068965517, "grad_norm": 5.4776835441589355, "learning_rate": 3.5586206896551725e-06, "loss": 0.7443, "step": 863 }, { "epoch": 2.979310344827586, "grad_norm": 2.189988851547241, "learning_rate": 3.5627586206896553e-06, "loss": 0.7228, "step": 864 }, { "epoch": 2.9827586206896552, "grad_norm": 3.184882164001465, "learning_rate": 3.566896551724138e-06, "loss": 0.7227, "step": 865 }, { "epoch": 2.986206896551724, "grad_norm": 1.9721134901046753, "learning_rate": 3.5710344827586206e-06, "loss": 0.7455, "step": 866 }, { "epoch": 2.989655172413793, "grad_norm": 2.3161919116973877, "learning_rate": 3.5751724137931035e-06, "loss": 0.7928, "step": 867 }, { "epoch": 2.9931034482758623, "grad_norm": 4.392090320587158, "learning_rate": 3.5793103448275864e-06, "loss": 0.8486, "step": 868 }, { "epoch": 2.996551724137931, "grad_norm": 2.957409143447876, "learning_rate": 3.5834482758620692e-06, "loss": 0.9038, "step": 869 }, { "epoch": 3.0, "grad_norm": 2.7430832386016846, "learning_rate": 3.5875862068965517e-06, "loss": 1.0554, "step": 870 }, { "epoch": 3.003448275862069, "grad_norm": 1.334889531135559, "learning_rate": 3.5917241379310345e-06, "loss": 0.7632, "step": 871 }, { "epoch": 3.0068965517241377, "grad_norm": 1.8028075695037842, "learning_rate": 3.5958620689655174e-06, "loss": 0.7337, "step": 872 }, { "epoch": 3.010344827586207, "grad_norm": 2.755401372909546, "learning_rate": 3.6e-06, "loss": 0.7274, "step": 873 }, { "epoch": 3.013793103448276, "grad_norm": 1.5374211072921753, "learning_rate": 3.6041379310344827e-06, "loss": 0.6879, "step": 874 }, { "epoch": 3.0172413793103448, "grad_norm": 1.611376404762268, "learning_rate": 3.6082758620689656e-06, "loss": 0.7344, "step": 875 }, { "epoch": 3.0206896551724136, "grad_norm": 1.421128511428833, "learning_rate": 3.6124137931034484e-06, "loss": 0.697, "step": 876 }, { "epoch": 3.024137931034483, "grad_norm": 1.7592328786849976, "learning_rate": 3.6165517241379313e-06, "loss": 0.7013, "step": 877 }, { "epoch": 3.027586206896552, "grad_norm": 2.1840600967407227, "learning_rate": 3.620689655172414e-06, "loss": 0.7164, "step": 878 }, { "epoch": 3.0310344827586206, "grad_norm": 1.5755367279052734, "learning_rate": 3.6248275862068966e-06, "loss": 0.6773, "step": 879 }, { "epoch": 3.0344827586206895, "grad_norm": 1.9003515243530273, "learning_rate": 3.6289655172413795e-06, "loss": 0.6991, "step": 880 }, { "epoch": 3.037931034482759, "grad_norm": 1.8994415998458862, "learning_rate": 3.6331034482758623e-06, "loss": 0.7516, "step": 881 }, { "epoch": 3.0413793103448277, "grad_norm": 2.1603469848632812, "learning_rate": 3.6372413793103448e-06, "loss": 0.7199, "step": 882 }, { "epoch": 3.0448275862068965, "grad_norm": 2.4857966899871826, "learning_rate": 3.6413793103448276e-06, "loss": 0.7289, "step": 883 }, { "epoch": 3.0482758620689654, "grad_norm": 3.5642359256744385, "learning_rate": 3.6455172413793105e-06, "loss": 0.7836, "step": 884 }, { "epoch": 3.0517241379310347, "grad_norm": 2.505202293395996, "learning_rate": 3.6496551724137934e-06, "loss": 0.7342, "step": 885 }, { "epoch": 3.0551724137931036, "grad_norm": 3.029014825820923, "learning_rate": 3.653793103448276e-06, "loss": 0.7141, "step": 886 }, { "epoch": 3.0586206896551724, "grad_norm": 2.350893497467041, "learning_rate": 3.6579310344827587e-06, "loss": 0.7305, "step": 887 }, { "epoch": 3.0620689655172413, "grad_norm": 2.134194850921631, "learning_rate": 3.6620689655172415e-06, "loss": 0.7229, "step": 888 }, { "epoch": 3.06551724137931, "grad_norm": 2.2232165336608887, "learning_rate": 3.666206896551724e-06, "loss": 0.7624, "step": 889 }, { "epoch": 3.0689655172413794, "grad_norm": 2.3695156574249268, "learning_rate": 3.670344827586207e-06, "loss": 0.829, "step": 890 }, { "epoch": 3.0724137931034483, "grad_norm": 5.618178367614746, "learning_rate": 3.6744827586206897e-06, "loss": 0.8199, "step": 891 }, { "epoch": 3.075862068965517, "grad_norm": 2.559140205383301, "learning_rate": 3.678620689655172e-06, "loss": 0.7397, "step": 892 }, { "epoch": 3.079310344827586, "grad_norm": 3.3309595584869385, "learning_rate": 3.6827586206896554e-06, "loss": 0.8454, "step": 893 }, { "epoch": 3.0827586206896553, "grad_norm": 6.810276508331299, "learning_rate": 3.6868965517241383e-06, "loss": 0.7896, "step": 894 }, { "epoch": 3.086206896551724, "grad_norm": 4.904645919799805, "learning_rate": 3.6910344827586208e-06, "loss": 1.0055, "step": 895 }, { "epoch": 3.089655172413793, "grad_norm": 1.9213762283325195, "learning_rate": 3.6951724137931036e-06, "loss": 0.7575, "step": 896 }, { "epoch": 3.093103448275862, "grad_norm": 1.2551428079605103, "learning_rate": 3.6993103448275865e-06, "loss": 0.6989, "step": 897 }, { "epoch": 3.0965517241379312, "grad_norm": 1.5423758029937744, "learning_rate": 3.703448275862069e-06, "loss": 0.6625, "step": 898 }, { "epoch": 3.1, "grad_norm": 1.6449401378631592, "learning_rate": 3.7075862068965518e-06, "loss": 0.6627, "step": 899 }, { "epoch": 3.103448275862069, "grad_norm": 1.9276618957519531, "learning_rate": 3.7117241379310347e-06, "loss": 0.7264, "step": 900 }, { "epoch": 3.106896551724138, "grad_norm": 3.1674468517303467, "learning_rate": 3.7158620689655175e-06, "loss": 0.6986, "step": 901 }, { "epoch": 3.110344827586207, "grad_norm": 1.4597328901290894, "learning_rate": 3.72e-06, "loss": 0.7048, "step": 902 }, { "epoch": 3.113793103448276, "grad_norm": 1.9029788970947266, "learning_rate": 3.724137931034483e-06, "loss": 0.651, "step": 903 }, { "epoch": 3.117241379310345, "grad_norm": 1.51195228099823, "learning_rate": 3.7282758620689657e-06, "loss": 0.6666, "step": 904 }, { "epoch": 3.1206896551724137, "grad_norm": 1.5542526245117188, "learning_rate": 3.732413793103448e-06, "loss": 0.6765, "step": 905 }, { "epoch": 3.1241379310344826, "grad_norm": 3.501437187194824, "learning_rate": 3.736551724137931e-06, "loss": 0.6847, "step": 906 }, { "epoch": 3.127586206896552, "grad_norm": 2.3663346767425537, "learning_rate": 3.740689655172414e-06, "loss": 0.6902, "step": 907 }, { "epoch": 3.1310344827586207, "grad_norm": 1.93671715259552, "learning_rate": 3.7448275862068963e-06, "loss": 0.7345, "step": 908 }, { "epoch": 3.1344827586206896, "grad_norm": 1.8789557218551636, "learning_rate": 3.748965517241379e-06, "loss": 0.6751, "step": 909 }, { "epoch": 3.1379310344827585, "grad_norm": 3.0305957794189453, "learning_rate": 3.753103448275862e-06, "loss": 0.7612, "step": 910 }, { "epoch": 3.1413793103448278, "grad_norm": 2.0694868564605713, "learning_rate": 3.7572413793103445e-06, "loss": 0.7108, "step": 911 }, { "epoch": 3.1448275862068966, "grad_norm": 2.0459964275360107, "learning_rate": 3.7613793103448273e-06, "loss": 0.7315, "step": 912 }, { "epoch": 3.1482758620689655, "grad_norm": 2.2010529041290283, "learning_rate": 3.76551724137931e-06, "loss": 0.6872, "step": 913 }, { "epoch": 3.1517241379310343, "grad_norm": 1.8128341436386108, "learning_rate": 3.769655172413793e-06, "loss": 0.7565, "step": 914 }, { "epoch": 3.1551724137931036, "grad_norm": 2.470078706741333, "learning_rate": 3.7737931034482755e-06, "loss": 0.7145, "step": 915 }, { "epoch": 3.1586206896551725, "grad_norm": 2.150045394897461, "learning_rate": 3.7779310344827584e-06, "loss": 0.7503, "step": 916 }, { "epoch": 3.1620689655172414, "grad_norm": 6.0240092277526855, "learning_rate": 3.7820689655172412e-06, "loss": 0.753, "step": 917 }, { "epoch": 3.1655172413793102, "grad_norm": 2.405945062637329, "learning_rate": 3.7862068965517245e-06, "loss": 0.7942, "step": 918 }, { "epoch": 3.1689655172413795, "grad_norm": 3.8100087642669678, "learning_rate": 3.7903448275862074e-06, "loss": 0.9169, "step": 919 }, { "epoch": 3.1724137931034484, "grad_norm": 5.8316450119018555, "learning_rate": 3.7944827586206903e-06, "loss": 1.0303, "step": 920 }, { "epoch": 3.1758620689655173, "grad_norm": 1.8242700099945068, "learning_rate": 3.7986206896551727e-06, "loss": 0.7193, "step": 921 }, { "epoch": 3.179310344827586, "grad_norm": 1.6818137168884277, "learning_rate": 3.8027586206896556e-06, "loss": 0.686, "step": 922 }, { "epoch": 3.182758620689655, "grad_norm": 1.3422383069992065, "learning_rate": 3.8068965517241384e-06, "loss": 0.6953, "step": 923 }, { "epoch": 3.1862068965517243, "grad_norm": 1.345848798751831, "learning_rate": 3.811034482758621e-06, "loss": 0.652, "step": 924 }, { "epoch": 3.189655172413793, "grad_norm": 1.661600947380066, "learning_rate": 3.815172413793104e-06, "loss": 0.6766, "step": 925 }, { "epoch": 3.193103448275862, "grad_norm": 1.6742887496948242, "learning_rate": 3.819310344827586e-06, "loss": 0.7304, "step": 926 }, { "epoch": 3.196551724137931, "grad_norm": 1.6975849866867065, "learning_rate": 3.823448275862069e-06, "loss": 0.6839, "step": 927 }, { "epoch": 3.2, "grad_norm": 1.4084980487823486, "learning_rate": 3.827586206896552e-06, "loss": 0.7007, "step": 928 }, { "epoch": 3.203448275862069, "grad_norm": 1.9435389041900635, "learning_rate": 3.831724137931035e-06, "loss": 0.6728, "step": 929 }, { "epoch": 3.206896551724138, "grad_norm": 1.5551739931106567, "learning_rate": 3.835862068965518e-06, "loss": 0.7022, "step": 930 }, { "epoch": 3.2103448275862068, "grad_norm": 2.084561824798584, "learning_rate": 3.8400000000000005e-06, "loss": 0.6818, "step": 931 }, { "epoch": 3.213793103448276, "grad_norm": 1.5992895364761353, "learning_rate": 3.8441379310344825e-06, "loss": 0.6999, "step": 932 }, { "epoch": 3.217241379310345, "grad_norm": 3.469864845275879, "learning_rate": 3.848275862068965e-06, "loss": 0.6719, "step": 933 }, { "epoch": 3.220689655172414, "grad_norm": 1.322084903717041, "learning_rate": 3.852413793103448e-06, "loss": 0.688, "step": 934 }, { "epoch": 3.2241379310344827, "grad_norm": 1.8061851263046265, "learning_rate": 3.856551724137931e-06, "loss": 0.7324, "step": 935 }, { "epoch": 3.227586206896552, "grad_norm": 1.6382994651794434, "learning_rate": 3.860689655172414e-06, "loss": 0.6899, "step": 936 }, { "epoch": 3.231034482758621, "grad_norm": 2.558715581893921, "learning_rate": 3.864827586206897e-06, "loss": 0.6914, "step": 937 }, { "epoch": 3.2344827586206897, "grad_norm": 2.432694435119629, "learning_rate": 3.86896551724138e-06, "loss": 0.7127, "step": 938 }, { "epoch": 3.2379310344827585, "grad_norm": 1.6806200742721558, "learning_rate": 3.873103448275862e-06, "loss": 0.7119, "step": 939 }, { "epoch": 3.2413793103448274, "grad_norm": 2.258993148803711, "learning_rate": 3.877241379310345e-06, "loss": 0.6884, "step": 940 }, { "epoch": 3.2448275862068967, "grad_norm": 2.2814154624938965, "learning_rate": 3.8813793103448275e-06, "loss": 0.7863, "step": 941 }, { "epoch": 3.2482758620689656, "grad_norm": 3.272803544998169, "learning_rate": 3.88551724137931e-06, "loss": 0.7517, "step": 942 }, { "epoch": 3.2517241379310344, "grad_norm": 1.9104044437408447, "learning_rate": 3.889655172413793e-06, "loss": 0.745, "step": 943 }, { "epoch": 3.2551724137931033, "grad_norm": 2.75915789604187, "learning_rate": 3.893793103448276e-06, "loss": 0.8653, "step": 944 }, { "epoch": 3.2586206896551726, "grad_norm": 3.3236641883850098, "learning_rate": 3.897931034482758e-06, "loss": 1.0765, "step": 945 }, { "epoch": 3.2620689655172415, "grad_norm": 2.3585612773895264, "learning_rate": 3.902068965517241e-06, "loss": 0.7139, "step": 946 }, { "epoch": 3.2655172413793103, "grad_norm": 1.578764796257019, "learning_rate": 3.906206896551724e-06, "loss": 0.6755, "step": 947 }, { "epoch": 3.268965517241379, "grad_norm": 1.7136107683181763, "learning_rate": 3.910344827586207e-06, "loss": 0.6387, "step": 948 }, { "epoch": 3.272413793103448, "grad_norm": 2.786057233810425, "learning_rate": 3.9144827586206895e-06, "loss": 0.6607, "step": 949 }, { "epoch": 3.2758620689655173, "grad_norm": 1.4657615423202515, "learning_rate": 3.918620689655172e-06, "loss": 0.5975, "step": 950 }, { "epoch": 3.279310344827586, "grad_norm": 2.588430643081665, "learning_rate": 3.922758620689655e-06, "loss": 0.6663, "step": 951 }, { "epoch": 3.282758620689655, "grad_norm": 2.64264178276062, "learning_rate": 3.926896551724138e-06, "loss": 0.6729, "step": 952 }, { "epoch": 3.2862068965517244, "grad_norm": 1.3151711225509644, "learning_rate": 3.931034482758621e-06, "loss": 0.6491, "step": 953 }, { "epoch": 3.2896551724137932, "grad_norm": 1.4630707502365112, "learning_rate": 3.935172413793104e-06, "loss": 0.6586, "step": 954 }, { "epoch": 3.293103448275862, "grad_norm": 1.556369662284851, "learning_rate": 3.939310344827587e-06, "loss": 0.688, "step": 955 }, { "epoch": 3.296551724137931, "grad_norm": 2.0628769397735596, "learning_rate": 3.94344827586207e-06, "loss": 0.7021, "step": 956 }, { "epoch": 3.3, "grad_norm": 1.5650904178619385, "learning_rate": 3.9475862068965525e-06, "loss": 0.6974, "step": 957 }, { "epoch": 3.303448275862069, "grad_norm": 5.116696357727051, "learning_rate": 3.9517241379310345e-06, "loss": 0.6844, "step": 958 }, { "epoch": 3.306896551724138, "grad_norm": 1.2801464796066284, "learning_rate": 3.955862068965517e-06, "loss": 0.6817, "step": 959 }, { "epoch": 3.310344827586207, "grad_norm": 2.13734769821167, "learning_rate": 3.96e-06, "loss": 0.7125, "step": 960 }, { "epoch": 3.3137931034482757, "grad_norm": 1.516658902168274, "learning_rate": 3.964137931034483e-06, "loss": 0.6915, "step": 961 }, { "epoch": 3.317241379310345, "grad_norm": 1.7300668954849243, "learning_rate": 3.968275862068966e-06, "loss": 0.6966, "step": 962 }, { "epoch": 3.320689655172414, "grad_norm": 3.1564879417419434, "learning_rate": 3.972413793103449e-06, "loss": 0.7668, "step": 963 }, { "epoch": 3.3241379310344827, "grad_norm": 1.8688613176345825, "learning_rate": 3.976551724137931e-06, "loss": 0.6513, "step": 964 }, { "epoch": 3.3275862068965516, "grad_norm": 1.8556026220321655, "learning_rate": 3.980689655172414e-06, "loss": 0.7476, "step": 965 }, { "epoch": 3.3310344827586205, "grad_norm": 2.1351943016052246, "learning_rate": 3.9848275862068965e-06, "loss": 0.7414, "step": 966 }, { "epoch": 3.3344827586206898, "grad_norm": 2.599860429763794, "learning_rate": 3.988965517241379e-06, "loss": 0.7321, "step": 967 }, { "epoch": 3.3379310344827586, "grad_norm": 10.060998916625977, "learning_rate": 3.993103448275862e-06, "loss": 0.7693, "step": 968 }, { "epoch": 3.3413793103448275, "grad_norm": 2.6665871143341064, "learning_rate": 3.997241379310345e-06, "loss": 0.7477, "step": 969 }, { "epoch": 3.344827586206897, "grad_norm": 4.878159523010254, "learning_rate": 4.001379310344828e-06, "loss": 1.0054, "step": 970 }, { "epoch": 3.3482758620689657, "grad_norm": 2.2865562438964844, "learning_rate": 4.00551724137931e-06, "loss": 0.7418, "step": 971 }, { "epoch": 3.3517241379310345, "grad_norm": 3.58701753616333, "learning_rate": 4.009655172413793e-06, "loss": 0.6354, "step": 972 }, { "epoch": 3.3551724137931034, "grad_norm": 1.9797636270523071, "learning_rate": 4.013793103448276e-06, "loss": 0.668, "step": 973 }, { "epoch": 3.3586206896551722, "grad_norm": 1.908616542816162, "learning_rate": 4.017931034482759e-06, "loss": 0.668, "step": 974 }, { "epoch": 3.3620689655172415, "grad_norm": 1.4296648502349854, "learning_rate": 4.0220689655172415e-06, "loss": 0.6477, "step": 975 }, { "epoch": 3.3655172413793104, "grad_norm": 1.814042091369629, "learning_rate": 4.026206896551724e-06, "loss": 0.6403, "step": 976 }, { "epoch": 3.3689655172413793, "grad_norm": 1.393262267112732, "learning_rate": 4.030344827586206e-06, "loss": 0.637, "step": 977 }, { "epoch": 3.372413793103448, "grad_norm": 1.448815941810608, "learning_rate": 4.034482758620689e-06, "loss": 0.6648, "step": 978 }, { "epoch": 3.3758620689655174, "grad_norm": 1.4277892112731934, "learning_rate": 4.038620689655172e-06, "loss": 0.6181, "step": 979 }, { "epoch": 3.3793103448275863, "grad_norm": 1.604138970375061, "learning_rate": 4.042758620689655e-06, "loss": 0.6472, "step": 980 }, { "epoch": 3.382758620689655, "grad_norm": 1.373464822769165, "learning_rate": 4.046896551724138e-06, "loss": 0.6354, "step": 981 }, { "epoch": 3.386206896551724, "grad_norm": 1.5067700147628784, "learning_rate": 4.051034482758621e-06, "loss": 0.698, "step": 982 }, { "epoch": 3.389655172413793, "grad_norm": 1.5531679391860962, "learning_rate": 4.0551724137931036e-06, "loss": 0.6393, "step": 983 }, { "epoch": 3.393103448275862, "grad_norm": 1.5208581686019897, "learning_rate": 4.0593103448275856e-06, "loss": 0.685, "step": 984 }, { "epoch": 3.396551724137931, "grad_norm": 1.4621888399124146, "learning_rate": 4.063448275862069e-06, "loss": 0.7137, "step": 985 }, { "epoch": 3.4, "grad_norm": 1.9267560243606567, "learning_rate": 4.067586206896552e-06, "loss": 0.6545, "step": 986 }, { "epoch": 3.4034482758620688, "grad_norm": 1.9730162620544434, "learning_rate": 4.071724137931035e-06, "loss": 0.6789, "step": 987 }, { "epoch": 3.406896551724138, "grad_norm": 2.1273791790008545, "learning_rate": 4.075862068965518e-06, "loss": 0.7029, "step": 988 }, { "epoch": 3.410344827586207, "grad_norm": 1.8510864973068237, "learning_rate": 4.080000000000001e-06, "loss": 0.723, "step": 989 }, { "epoch": 3.413793103448276, "grad_norm": 1.9860560894012451, "learning_rate": 4.084137931034483e-06, "loss": 0.7007, "step": 990 }, { "epoch": 3.4172413793103447, "grad_norm": 1.9288225173950195, "learning_rate": 4.088275862068966e-06, "loss": 0.7093, "step": 991 }, { "epoch": 3.420689655172414, "grad_norm": 1.5931713581085205, "learning_rate": 4.0924137931034485e-06, "loss": 0.7286, "step": 992 }, { "epoch": 3.424137931034483, "grad_norm": 2.527557134628296, "learning_rate": 4.096551724137931e-06, "loss": 0.7175, "step": 993 }, { "epoch": 3.4275862068965517, "grad_norm": 2.1423404216766357, "learning_rate": 4.100689655172414e-06, "loss": 0.7742, "step": 994 }, { "epoch": 3.4310344827586206, "grad_norm": 5.6127495765686035, "learning_rate": 4.104827586206897e-06, "loss": 1.0017, "step": 995 }, { "epoch": 3.43448275862069, "grad_norm": 2.0517547130584717, "learning_rate": 4.10896551724138e-06, "loss": 0.6979, "step": 996 }, { "epoch": 3.4379310344827587, "grad_norm": 1.2114652395248413, "learning_rate": 4.113103448275862e-06, "loss": 0.6945, "step": 997 }, { "epoch": 3.4413793103448276, "grad_norm": 1.4787670373916626, "learning_rate": 4.117241379310345e-06, "loss": 0.6426, "step": 998 }, { "epoch": 3.4448275862068964, "grad_norm": 1.8686472177505493, "learning_rate": 4.121379310344828e-06, "loss": 0.6339, "step": 999 }, { "epoch": 3.4482758620689653, "grad_norm": 2.5821805000305176, "learning_rate": 4.1255172413793106e-06, "loss": 0.638, "step": 1000 }, { "epoch": 3.4482758620689653, "eval_cer": 0.20858785020232481, "eval_loss": 0.6548400521278381, "eval_runtime": 18.4432, "eval_samples_per_second": 50.262, "eval_steps_per_second": 0.163, "eval_wer": 0.5546066252587992, "step": 1000 }, { "epoch": 3.4517241379310346, "grad_norm": 1.2701741456985474, "learning_rate": 4.1296551724137934e-06, "loss": 0.6152, "step": 1001 }, { "epoch": 3.4551724137931035, "grad_norm": 7.228919506072998, "learning_rate": 4.133793103448276e-06, "loss": 0.6643, "step": 1002 }, { "epoch": 3.4586206896551723, "grad_norm": 1.539853572845459, "learning_rate": 4.137931034482758e-06, "loss": 0.6645, "step": 1003 }, { "epoch": 3.462068965517241, "grad_norm": 1.6449264287948608, "learning_rate": 4.142068965517241e-06, "loss": 0.6485, "step": 1004 }, { "epoch": 3.4655172413793105, "grad_norm": 2.0587871074676514, "learning_rate": 4.146206896551724e-06, "loss": 0.6404, "step": 1005 }, { "epoch": 3.4689655172413794, "grad_norm": 1.6748193502426147, "learning_rate": 4.150344827586207e-06, "loss": 0.6644, "step": 1006 }, { "epoch": 3.472413793103448, "grad_norm": 2.048328399658203, "learning_rate": 4.15448275862069e-06, "loss": 0.6485, "step": 1007 }, { "epoch": 3.475862068965517, "grad_norm": 1.4788068532943726, "learning_rate": 4.158620689655173e-06, "loss": 0.6642, "step": 1008 }, { "epoch": 3.4793103448275864, "grad_norm": 1.8029149770736694, "learning_rate": 4.162758620689655e-06, "loss": 0.6659, "step": 1009 }, { "epoch": 3.4827586206896552, "grad_norm": 3.157318353652954, "learning_rate": 4.1668965517241375e-06, "loss": 0.654, "step": 1010 }, { "epoch": 3.486206896551724, "grad_norm": 2.2937588691711426, "learning_rate": 4.17103448275862e-06, "loss": 0.6615, "step": 1011 }, { "epoch": 3.489655172413793, "grad_norm": 1.8290549516677856, "learning_rate": 4.175172413793103e-06, "loss": 0.6551, "step": 1012 }, { "epoch": 3.4931034482758623, "grad_norm": 2.3619654178619385, "learning_rate": 4.179310344827586e-06, "loss": 0.6871, "step": 1013 }, { "epoch": 3.496551724137931, "grad_norm": 2.5140185356140137, "learning_rate": 4.183448275862069e-06, "loss": 0.6978, "step": 1014 }, { "epoch": 3.5, "grad_norm": 2.6015191078186035, "learning_rate": 4.187586206896552e-06, "loss": 0.7591, "step": 1015 }, { "epoch": 3.503448275862069, "grad_norm": 2.592700719833374, "learning_rate": 4.191724137931034e-06, "loss": 0.7, "step": 1016 }, { "epoch": 3.5068965517241377, "grad_norm": 1.8613988161087036, "learning_rate": 4.195862068965517e-06, "loss": 0.7178, "step": 1017 }, { "epoch": 3.510344827586207, "grad_norm": 2.3874917030334473, "learning_rate": 4.2000000000000004e-06, "loss": 0.7445, "step": 1018 }, { "epoch": 3.513793103448276, "grad_norm": 2.252678155899048, "learning_rate": 4.204137931034483e-06, "loss": 0.7752, "step": 1019 }, { "epoch": 3.5172413793103448, "grad_norm": 3.500579833984375, "learning_rate": 4.208275862068966e-06, "loss": 0.9124, "step": 1020 }, { "epoch": 3.520689655172414, "grad_norm": 1.3122608661651611, "learning_rate": 4.212413793103449e-06, "loss": 0.6853, "step": 1021 }, { "epoch": 3.524137931034483, "grad_norm": 1.8165496587753296, "learning_rate": 4.216551724137931e-06, "loss": 0.6075, "step": 1022 }, { "epoch": 3.527586206896552, "grad_norm": 2.0591788291931152, "learning_rate": 4.220689655172414e-06, "loss": 0.6146, "step": 1023 }, { "epoch": 3.5310344827586206, "grad_norm": 1.3496644496917725, "learning_rate": 4.224827586206897e-06, "loss": 0.6235, "step": 1024 }, { "epoch": 3.5344827586206895, "grad_norm": 1.4400564432144165, "learning_rate": 4.22896551724138e-06, "loss": 0.6472, "step": 1025 }, { "epoch": 3.5379310344827584, "grad_norm": 1.599706768989563, "learning_rate": 4.2331034482758625e-06, "loss": 0.5951, "step": 1026 }, { "epoch": 3.5413793103448277, "grad_norm": 1.4673181772232056, "learning_rate": 4.237241379310345e-06, "loss": 0.6411, "step": 1027 }, { "epoch": 3.5448275862068965, "grad_norm": 1.4737690687179565, "learning_rate": 4.241379310344828e-06, "loss": 0.6617, "step": 1028 }, { "epoch": 3.5482758620689654, "grad_norm": 4.286870956420898, "learning_rate": 4.24551724137931e-06, "loss": 0.6436, "step": 1029 }, { "epoch": 3.5517241379310347, "grad_norm": 1.55207097530365, "learning_rate": 4.249655172413793e-06, "loss": 0.6278, "step": 1030 }, { "epoch": 3.5551724137931036, "grad_norm": 1.5046641826629639, "learning_rate": 4.253793103448276e-06, "loss": 0.6371, "step": 1031 }, { "epoch": 3.5586206896551724, "grad_norm": 1.632129430770874, "learning_rate": 4.257931034482759e-06, "loss": 0.6468, "step": 1032 }, { "epoch": 3.5620689655172413, "grad_norm": 1.5645672082901, "learning_rate": 4.262068965517242e-06, "loss": 0.6194, "step": 1033 }, { "epoch": 3.56551724137931, "grad_norm": 2.0330729484558105, "learning_rate": 4.266206896551725e-06, "loss": 0.6526, "step": 1034 }, { "epoch": 3.5689655172413794, "grad_norm": 8.772598266601562, "learning_rate": 4.270344827586207e-06, "loss": 0.6742, "step": 1035 }, { "epoch": 3.5724137931034483, "grad_norm": 2.9715094566345215, "learning_rate": 4.2744827586206895e-06, "loss": 0.6867, "step": 1036 }, { "epoch": 3.575862068965517, "grad_norm": 2.398526668548584, "learning_rate": 4.278620689655172e-06, "loss": 0.6637, "step": 1037 }, { "epoch": 3.5793103448275865, "grad_norm": 2.3093485832214355, "learning_rate": 4.282758620689655e-06, "loss": 0.6888, "step": 1038 }, { "epoch": 3.5827586206896553, "grad_norm": 1.845329761505127, "learning_rate": 4.286896551724138e-06, "loss": 0.6448, "step": 1039 }, { "epoch": 3.586206896551724, "grad_norm": 3.1924471855163574, "learning_rate": 4.291034482758621e-06, "loss": 0.6686, "step": 1040 }, { "epoch": 3.589655172413793, "grad_norm": 1.8964985609054565, "learning_rate": 4.295172413793104e-06, "loss": 0.698, "step": 1041 }, { "epoch": 3.593103448275862, "grad_norm": 2.269728183746338, "learning_rate": 4.299310344827586e-06, "loss": 0.6481, "step": 1042 }, { "epoch": 3.596551724137931, "grad_norm": 2.3797082901000977, "learning_rate": 4.303448275862069e-06, "loss": 0.7219, "step": 1043 }, { "epoch": 3.6, "grad_norm": 2.3971407413482666, "learning_rate": 4.3075862068965515e-06, "loss": 0.8036, "step": 1044 }, { "epoch": 3.603448275862069, "grad_norm": 3.115861654281616, "learning_rate": 4.311724137931034e-06, "loss": 0.9035, "step": 1045 }, { "epoch": 3.606896551724138, "grad_norm": 1.8180443048477173, "learning_rate": 4.315862068965517e-06, "loss": 0.6769, "step": 1046 }, { "epoch": 3.610344827586207, "grad_norm": 1.1155203580856323, "learning_rate": 4.32e-06, "loss": 0.6245, "step": 1047 }, { "epoch": 3.613793103448276, "grad_norm": 1.3693504333496094, "learning_rate": 4.324137931034482e-06, "loss": 0.6423, "step": 1048 }, { "epoch": 3.617241379310345, "grad_norm": 1.3484151363372803, "learning_rate": 4.328275862068965e-06, "loss": 0.5764, "step": 1049 }, { "epoch": 3.6206896551724137, "grad_norm": 1.772029161453247, "learning_rate": 4.332413793103448e-06, "loss": 0.6502, "step": 1050 }, { "epoch": 3.6241379310344826, "grad_norm": 1.1836544275283813, "learning_rate": 4.336551724137932e-06, "loss": 0.5963, "step": 1051 }, { "epoch": 3.627586206896552, "grad_norm": 1.3410117626190186, "learning_rate": 4.3406896551724145e-06, "loss": 0.6375, "step": 1052 }, { "epoch": 3.6310344827586207, "grad_norm": 1.290114164352417, "learning_rate": 4.344827586206897e-06, "loss": 0.6312, "step": 1053 }, { "epoch": 3.6344827586206896, "grad_norm": 1.6923660039901733, "learning_rate": 4.348965517241379e-06, "loss": 0.6439, "step": 1054 }, { "epoch": 3.637931034482759, "grad_norm": 3.617198944091797, "learning_rate": 4.353103448275862e-06, "loss": 0.6352, "step": 1055 }, { "epoch": 3.6413793103448278, "grad_norm": 1.5459996461868286, "learning_rate": 4.357241379310345e-06, "loss": 0.6304, "step": 1056 }, { "epoch": 3.6448275862068966, "grad_norm": 2.242572069168091, "learning_rate": 4.361379310344828e-06, "loss": 0.6669, "step": 1057 }, { "epoch": 3.6482758620689655, "grad_norm": 1.5348623991012573, "learning_rate": 4.365517241379311e-06, "loss": 0.6126, "step": 1058 }, { "epoch": 3.6517241379310343, "grad_norm": 35.01888656616211, "learning_rate": 4.369655172413794e-06, "loss": 0.6444, "step": 1059 }, { "epoch": 3.655172413793103, "grad_norm": 1.42215895652771, "learning_rate": 4.3737931034482765e-06, "loss": 0.6857, "step": 1060 }, { "epoch": 3.6586206896551725, "grad_norm": 1.4843158721923828, "learning_rate": 4.3779310344827586e-06, "loss": 0.6291, "step": 1061 }, { "epoch": 3.6620689655172414, "grad_norm": 1.934261441230774, "learning_rate": 4.3820689655172414e-06, "loss": 0.6377, "step": 1062 }, { "epoch": 3.6655172413793102, "grad_norm": 2.852144956588745, "learning_rate": 4.386206896551724e-06, "loss": 0.6425, "step": 1063 }, { "epoch": 3.6689655172413795, "grad_norm": 2.211871862411499, "learning_rate": 4.390344827586207e-06, "loss": 0.7134, "step": 1064 }, { "epoch": 3.6724137931034484, "grad_norm": 1.8411730527877808, "learning_rate": 4.39448275862069e-06, "loss": 0.6541, "step": 1065 }, { "epoch": 3.6758620689655173, "grad_norm": 2.2286343574523926, "learning_rate": 4.398620689655173e-06, "loss": 0.6512, "step": 1066 }, { "epoch": 3.679310344827586, "grad_norm": 3.1533262729644775, "learning_rate": 4.402758620689655e-06, "loss": 0.675, "step": 1067 }, { "epoch": 3.682758620689655, "grad_norm": 2.6288537979125977, "learning_rate": 4.406896551724138e-06, "loss": 0.7328, "step": 1068 }, { "epoch": 3.6862068965517243, "grad_norm": 2.256941318511963, "learning_rate": 4.411034482758621e-06, "loss": 0.7262, "step": 1069 }, { "epoch": 3.689655172413793, "grad_norm": 3.7694270610809326, "learning_rate": 4.4151724137931035e-06, "loss": 0.901, "step": 1070 }, { "epoch": 3.693103448275862, "grad_norm": 1.5402870178222656, "learning_rate": 4.419310344827586e-06, "loss": 0.6791, "step": 1071 }, { "epoch": 3.696551724137931, "grad_norm": 1.2894163131713867, "learning_rate": 4.423448275862069e-06, "loss": 0.6159, "step": 1072 }, { "epoch": 3.7, "grad_norm": 1.5078057050704956, "learning_rate": 4.427586206896552e-06, "loss": 0.5976, "step": 1073 }, { "epoch": 3.703448275862069, "grad_norm": 1.3060811758041382, "learning_rate": 4.431724137931034e-06, "loss": 0.6065, "step": 1074 }, { "epoch": 3.706896551724138, "grad_norm": 1.5463155508041382, "learning_rate": 4.435862068965517e-06, "loss": 0.6086, "step": 1075 }, { "epoch": 3.7103448275862068, "grad_norm": 1.9046581983566284, "learning_rate": 4.44e-06, "loss": 0.6318, "step": 1076 }, { "epoch": 3.7137931034482756, "grad_norm": 1.3249549865722656, "learning_rate": 4.444137931034483e-06, "loss": 0.6262, "step": 1077 }, { "epoch": 3.717241379310345, "grad_norm": 1.72825026512146, "learning_rate": 4.4482758620689656e-06, "loss": 0.656, "step": 1078 }, { "epoch": 3.720689655172414, "grad_norm": 2.2947299480438232, "learning_rate": 4.4524137931034484e-06, "loss": 0.5889, "step": 1079 }, { "epoch": 3.7241379310344827, "grad_norm": 1.29935884475708, "learning_rate": 4.4565517241379305e-06, "loss": 0.6074, "step": 1080 }, { "epoch": 3.727586206896552, "grad_norm": 1.604335069656372, "learning_rate": 4.460689655172413e-06, "loss": 0.6217, "step": 1081 }, { "epoch": 3.731034482758621, "grad_norm": 1.3876477479934692, "learning_rate": 4.464827586206896e-06, "loss": 0.6446, "step": 1082 }, { "epoch": 3.7344827586206897, "grad_norm": 1.4027408361434937, "learning_rate": 4.468965517241379e-06, "loss": 0.5709, "step": 1083 }, { "epoch": 3.7379310344827585, "grad_norm": 1.5710816383361816, "learning_rate": 4.473103448275863e-06, "loss": 0.6315, "step": 1084 }, { "epoch": 3.7413793103448274, "grad_norm": 2.5128822326660156, "learning_rate": 4.477241379310346e-06, "loss": 0.6238, "step": 1085 }, { "epoch": 3.7448275862068967, "grad_norm": 2.531512498855591, "learning_rate": 4.481379310344828e-06, "loss": 0.656, "step": 1086 }, { "epoch": 3.7482758620689656, "grad_norm": 1.9680639505386353, "learning_rate": 4.4855172413793105e-06, "loss": 0.6169, "step": 1087 }, { "epoch": 3.7517241379310344, "grad_norm": 1.6793618202209473, "learning_rate": 4.489655172413793e-06, "loss": 0.6426, "step": 1088 }, { "epoch": 3.7551724137931033, "grad_norm": 2.2108283042907715, "learning_rate": 4.493793103448276e-06, "loss": 0.6582, "step": 1089 }, { "epoch": 3.7586206896551726, "grad_norm": 1.8628226518630981, "learning_rate": 4.497931034482759e-06, "loss": 0.6475, "step": 1090 }, { "epoch": 3.7620689655172415, "grad_norm": 2.0412116050720215, "learning_rate": 4.502068965517242e-06, "loss": 0.6595, "step": 1091 }, { "epoch": 3.7655172413793103, "grad_norm": 2.0809123516082764, "learning_rate": 4.506206896551725e-06, "loss": 0.6565, "step": 1092 }, { "epoch": 3.768965517241379, "grad_norm": 2.1092329025268555, "learning_rate": 4.510344827586207e-06, "loss": 0.7286, "step": 1093 }, { "epoch": 3.772413793103448, "grad_norm": 3.0890376567840576, "learning_rate": 4.51448275862069e-06, "loss": 0.7942, "step": 1094 }, { "epoch": 3.7758620689655173, "grad_norm": 3.4979896545410156, "learning_rate": 4.518620689655173e-06, "loss": 0.9101, "step": 1095 }, { "epoch": 3.779310344827586, "grad_norm": 2.018592596054077, "learning_rate": 4.5227586206896554e-06, "loss": 0.6498, "step": 1096 }, { "epoch": 3.782758620689655, "grad_norm": 1.0781456232070923, "learning_rate": 4.526896551724138e-06, "loss": 0.5938, "step": 1097 }, { "epoch": 3.7862068965517244, "grad_norm": 1.694867491722107, "learning_rate": 4.531034482758621e-06, "loss": 0.6109, "step": 1098 }, { "epoch": 3.7896551724137932, "grad_norm": 4.202380180358887, "learning_rate": 4.535172413793103e-06, "loss": 0.6401, "step": 1099 }, { "epoch": 3.793103448275862, "grad_norm": 1.2893197536468506, "learning_rate": 4.539310344827586e-06, "loss": 0.6389, "step": 1100 }, { "epoch": 3.796551724137931, "grad_norm": 1.3038984537124634, "learning_rate": 4.543448275862069e-06, "loss": 0.5643, "step": 1101 }, { "epoch": 3.8, "grad_norm": 1.3572940826416016, "learning_rate": 4.547586206896552e-06, "loss": 0.6094, "step": 1102 }, { "epoch": 3.803448275862069, "grad_norm": 1.5306403636932373, "learning_rate": 4.551724137931035e-06, "loss": 0.6198, "step": 1103 }, { "epoch": 3.806896551724138, "grad_norm": 1.9523472785949707, "learning_rate": 4.5558620689655175e-06, "loss": 0.5913, "step": 1104 }, { "epoch": 3.810344827586207, "grad_norm": 1.6251890659332275, "learning_rate": 4.56e-06, "loss": 0.5946, "step": 1105 }, { "epoch": 3.8137931034482757, "grad_norm": 1.3286422491073608, "learning_rate": 4.564137931034482e-06, "loss": 0.6435, "step": 1106 }, { "epoch": 3.817241379310345, "grad_norm": 1.5145118236541748, "learning_rate": 4.568275862068965e-06, "loss": 0.5725, "step": 1107 }, { "epoch": 3.820689655172414, "grad_norm": 2.0372531414031982, "learning_rate": 4.572413793103448e-06, "loss": 0.6137, "step": 1108 }, { "epoch": 3.8241379310344827, "grad_norm": 1.3124699592590332, "learning_rate": 4.576551724137931e-06, "loss": 0.5798, "step": 1109 }, { "epoch": 3.8275862068965516, "grad_norm": 2.357947826385498, "learning_rate": 4.580689655172414e-06, "loss": 0.6191, "step": 1110 }, { "epoch": 3.8310344827586205, "grad_norm": 1.81540846824646, "learning_rate": 4.584827586206897e-06, "loss": 0.6244, "step": 1111 }, { "epoch": 3.8344827586206898, "grad_norm": 1.6689836978912354, "learning_rate": 4.588965517241379e-06, "loss": 0.6396, "step": 1112 }, { "epoch": 3.8379310344827586, "grad_norm": 1.5100585222244263, "learning_rate": 4.593103448275862e-06, "loss": 0.618, "step": 1113 }, { "epoch": 3.8413793103448275, "grad_norm": 1.3862895965576172, "learning_rate": 4.5972413793103445e-06, "loss": 0.6121, "step": 1114 }, { "epoch": 3.844827586206897, "grad_norm": 2.071887969970703, "learning_rate": 4.601379310344827e-06, "loss": 0.6045, "step": 1115 }, { "epoch": 3.8482758620689657, "grad_norm": 2.2448103427886963, "learning_rate": 4.60551724137931e-06, "loss": 0.6942, "step": 1116 }, { "epoch": 3.8517241379310345, "grad_norm": 1.7476688623428345, "learning_rate": 4.609655172413793e-06, "loss": 0.6795, "step": 1117 }, { "epoch": 3.8551724137931034, "grad_norm": 2.868575096130371, "learning_rate": 4.613793103448277e-06, "loss": 0.6727, "step": 1118 }, { "epoch": 3.8586206896551722, "grad_norm": 2.935899257659912, "learning_rate": 4.617931034482759e-06, "loss": 0.7449, "step": 1119 }, { "epoch": 3.862068965517241, "grad_norm": 2.9837799072265625, "learning_rate": 4.622068965517242e-06, "loss": 0.8883, "step": 1120 }, { "epoch": 3.8655172413793104, "grad_norm": 2.5879440307617188, "learning_rate": 4.6262068965517245e-06, "loss": 0.6738, "step": 1121 }, { "epoch": 3.8689655172413793, "grad_norm": 1.8153220415115356, "learning_rate": 4.630344827586207e-06, "loss": 0.6326, "step": 1122 }, { "epoch": 3.872413793103448, "grad_norm": 1.4666874408721924, "learning_rate": 4.63448275862069e-06, "loss": 0.6098, "step": 1123 }, { "epoch": 3.8758620689655174, "grad_norm": 2.201871156692505, "learning_rate": 4.638620689655173e-06, "loss": 0.6172, "step": 1124 }, { "epoch": 3.8793103448275863, "grad_norm": 1.9830551147460938, "learning_rate": 4.642758620689655e-06, "loss": 0.5968, "step": 1125 }, { "epoch": 3.882758620689655, "grad_norm": 1.7245794534683228, "learning_rate": 4.646896551724138e-06, "loss": 0.5959, "step": 1126 }, { "epoch": 3.886206896551724, "grad_norm": 1.7532061338424683, "learning_rate": 4.651034482758621e-06, "loss": 0.5847, "step": 1127 }, { "epoch": 3.889655172413793, "grad_norm": 1.3248069286346436, "learning_rate": 4.655172413793104e-06, "loss": 0.5866, "step": 1128 }, { "epoch": 3.893103448275862, "grad_norm": 1.5570735931396484, "learning_rate": 4.659310344827587e-06, "loss": 0.5903, "step": 1129 }, { "epoch": 3.896551724137931, "grad_norm": 1.0868003368377686, "learning_rate": 4.6634482758620695e-06, "loss": 0.5752, "step": 1130 }, { "epoch": 3.9, "grad_norm": 1.4609012603759766, "learning_rate": 4.6675862068965515e-06, "loss": 0.5643, "step": 1131 }, { "epoch": 3.903448275862069, "grad_norm": 1.5023943185806274, "learning_rate": 4.671724137931034e-06, "loss": 0.6268, "step": 1132 }, { "epoch": 3.906896551724138, "grad_norm": 1.7904887199401855, "learning_rate": 4.675862068965517e-06, "loss": 0.5879, "step": 1133 }, { "epoch": 3.910344827586207, "grad_norm": 1.8266535997390747, "learning_rate": 4.68e-06, "loss": 0.6178, "step": 1134 }, { "epoch": 3.913793103448276, "grad_norm": 2.575582265853882, "learning_rate": 4.684137931034483e-06, "loss": 0.6452, "step": 1135 }, { "epoch": 3.9172413793103447, "grad_norm": 1.6434073448181152, "learning_rate": 4.688275862068966e-06, "loss": 0.5994, "step": 1136 }, { "epoch": 3.9206896551724135, "grad_norm": 1.5129584074020386, "learning_rate": 4.692413793103449e-06, "loss": 0.6273, "step": 1137 }, { "epoch": 3.924137931034483, "grad_norm": 2.60565185546875, "learning_rate": 4.696551724137931e-06, "loss": 0.6152, "step": 1138 }, { "epoch": 3.9275862068965517, "grad_norm": 2.479519844055176, "learning_rate": 4.7006896551724136e-06, "loss": 0.6056, "step": 1139 }, { "epoch": 3.9310344827586206, "grad_norm": 1.5183517932891846, "learning_rate": 4.7048275862068964e-06, "loss": 0.6332, "step": 1140 }, { "epoch": 3.93448275862069, "grad_norm": 1.5930291414260864, "learning_rate": 4.708965517241379e-06, "loss": 0.5891, "step": 1141 }, { "epoch": 3.9379310344827587, "grad_norm": 1.624651312828064, "learning_rate": 4.713103448275862e-06, "loss": 0.6745, "step": 1142 }, { "epoch": 3.9413793103448276, "grad_norm": 2.5409231185913086, "learning_rate": 4.717241379310345e-06, "loss": 0.6552, "step": 1143 }, { "epoch": 3.9448275862068964, "grad_norm": 2.9037070274353027, "learning_rate": 4.721379310344827e-06, "loss": 0.7086, "step": 1144 }, { "epoch": 3.9482758620689653, "grad_norm": 3.8177895545959473, "learning_rate": 4.72551724137931e-06, "loss": 0.8904, "step": 1145 }, { "epoch": 3.9517241379310346, "grad_norm": 1.3780477046966553, "learning_rate": 4.729655172413793e-06, "loss": 0.6326, "step": 1146 }, { "epoch": 3.9551724137931035, "grad_norm": 1.2174111604690552, "learning_rate": 4.733793103448276e-06, "loss": 0.607, "step": 1147 }, { "epoch": 3.9586206896551723, "grad_norm": 1.1693755388259888, "learning_rate": 4.7379310344827585e-06, "loss": 0.5836, "step": 1148 }, { "epoch": 3.9620689655172416, "grad_norm": 1.1335092782974243, "learning_rate": 4.742068965517241e-06, "loss": 0.5857, "step": 1149 }, { "epoch": 3.9655172413793105, "grad_norm": 1.7654539346694946, "learning_rate": 4.746206896551724e-06, "loss": 0.6003, "step": 1150 }, { "epoch": 3.9689655172413794, "grad_norm": 2.290055274963379, "learning_rate": 4.750344827586207e-06, "loss": 0.587, "step": 1151 }, { "epoch": 3.972413793103448, "grad_norm": 1.348717451095581, "learning_rate": 4.75448275862069e-06, "loss": 0.5963, "step": 1152 }, { "epoch": 3.975862068965517, "grad_norm": 1.463788628578186, "learning_rate": 4.758620689655173e-06, "loss": 0.6095, "step": 1153 }, { "epoch": 3.979310344827586, "grad_norm": 2.1479790210723877, "learning_rate": 4.762758620689656e-06, "loss": 0.608, "step": 1154 }, { "epoch": 3.9827586206896552, "grad_norm": 1.2101342678070068, "learning_rate": 4.7668965517241386e-06, "loss": 0.5931, "step": 1155 }, { "epoch": 3.986206896551724, "grad_norm": 1.6240313053131104, "learning_rate": 4.771034482758621e-06, "loss": 0.6115, "step": 1156 }, { "epoch": 3.989655172413793, "grad_norm": 1.6252233982086182, "learning_rate": 4.7751724137931034e-06, "loss": 0.6669, "step": 1157 }, { "epoch": 3.9931034482758623, "grad_norm": 1.518173098564148, "learning_rate": 4.779310344827586e-06, "loss": 0.6765, "step": 1158 }, { "epoch": 3.996551724137931, "grad_norm": 2.4850916862487793, "learning_rate": 4.783448275862069e-06, "loss": 0.6532, "step": 1159 }, { "epoch": 4.0, "grad_norm": 2.1533780097961426, "learning_rate": 4.787586206896552e-06, "loss": 0.8486, "step": 1160 }, { "epoch": 4.003448275862069, "grad_norm": 1.3117220401763916, "learning_rate": 4.791724137931035e-06, "loss": 0.5982, "step": 1161 }, { "epoch": 4.006896551724138, "grad_norm": 1.1078472137451172, "learning_rate": 4.795862068965518e-06, "loss": 0.5847, "step": 1162 }, { "epoch": 4.010344827586207, "grad_norm": 1.2918200492858887, "learning_rate": 4.800000000000001e-06, "loss": 0.6199, "step": 1163 }, { "epoch": 4.0137931034482754, "grad_norm": 1.1282401084899902, "learning_rate": 4.804137931034483e-06, "loss": 0.5496, "step": 1164 }, { "epoch": 4.017241379310345, "grad_norm": 1.1605031490325928, "learning_rate": 4.8082758620689655e-06, "loss": 0.5902, "step": 1165 }, { "epoch": 4.020689655172414, "grad_norm": 1.283850908279419, "learning_rate": 4.812413793103448e-06, "loss": 0.5632, "step": 1166 }, { "epoch": 4.024137931034483, "grad_norm": 1.143991231918335, "learning_rate": 4.816551724137931e-06, "loss": 0.6141, "step": 1167 }, { "epoch": 4.027586206896552, "grad_norm": 2.252002239227295, "learning_rate": 4.820689655172414e-06, "loss": 0.5918, "step": 1168 }, { "epoch": 4.031034482758621, "grad_norm": 1.1432228088378906, "learning_rate": 4.824827586206897e-06, "loss": 0.5409, "step": 1169 }, { "epoch": 4.0344827586206895, "grad_norm": 1.2917847633361816, "learning_rate": 4.828965517241379e-06, "loss": 0.5421, "step": 1170 }, { "epoch": 4.037931034482758, "grad_norm": 1.2971941232681274, "learning_rate": 4.833103448275862e-06, "loss": 0.5564, "step": 1171 }, { "epoch": 4.041379310344827, "grad_norm": 1.2099064588546753, "learning_rate": 4.837241379310345e-06, "loss": 0.5678, "step": 1172 }, { "epoch": 4.044827586206897, "grad_norm": 1.8231791257858276, "learning_rate": 4.841379310344828e-06, "loss": 0.5928, "step": 1173 }, { "epoch": 4.048275862068966, "grad_norm": 1.4682183265686035, "learning_rate": 4.8455172413793105e-06, "loss": 0.5789, "step": 1174 }, { "epoch": 4.051724137931035, "grad_norm": 1.3946151733398438, "learning_rate": 4.849655172413793e-06, "loss": 0.6028, "step": 1175 }, { "epoch": 4.055172413793104, "grad_norm": 1.7045047283172607, "learning_rate": 4.853793103448276e-06, "loss": 0.5908, "step": 1176 }, { "epoch": 4.058620689655172, "grad_norm": 2.165504217147827, "learning_rate": 4.857931034482758e-06, "loss": 0.5881, "step": 1177 }, { "epoch": 4.062068965517241, "grad_norm": 1.4425709247589111, "learning_rate": 4.862068965517241e-06, "loss": 0.5942, "step": 1178 }, { "epoch": 4.06551724137931, "grad_norm": 1.544984221458435, "learning_rate": 4.866206896551724e-06, "loss": 0.6368, "step": 1179 }, { "epoch": 4.068965517241379, "grad_norm": 1.5750399827957153, "learning_rate": 4.870344827586207e-06, "loss": 0.5845, "step": 1180 }, { "epoch": 4.072413793103448, "grad_norm": 1.6177928447723389, "learning_rate": 4.87448275862069e-06, "loss": 0.6224, "step": 1181 }, { "epoch": 4.075862068965518, "grad_norm": 4.958315849304199, "learning_rate": 4.8786206896551725e-06, "loss": 0.6177, "step": 1182 }, { "epoch": 4.0793103448275865, "grad_norm": 2.726759910583496, "learning_rate": 4.8827586206896545e-06, "loss": 0.679, "step": 1183 }, { "epoch": 4.082758620689655, "grad_norm": 6.679110050201416, "learning_rate": 4.886896551724138e-06, "loss": 0.722, "step": 1184 }, { "epoch": 4.086206896551724, "grad_norm": 2.44397234916687, "learning_rate": 4.891034482758621e-06, "loss": 0.9011, "step": 1185 }, { "epoch": 4.089655172413793, "grad_norm": 1.3386980295181274, "learning_rate": 4.895172413793104e-06, "loss": 0.5879, "step": 1186 }, { "epoch": 4.093103448275862, "grad_norm": 1.2071493864059448, "learning_rate": 4.899310344827587e-06, "loss": 0.5876, "step": 1187 }, { "epoch": 4.096551724137931, "grad_norm": 1.1650285720825195, "learning_rate": 4.90344827586207e-06, "loss": 0.6049, "step": 1188 }, { "epoch": 4.1, "grad_norm": 1.3527129888534546, "learning_rate": 4.907586206896552e-06, "loss": 0.5559, "step": 1189 }, { "epoch": 4.103448275862069, "grad_norm": 1.4758493900299072, "learning_rate": 4.911724137931035e-06, "loss": 0.5573, "step": 1190 }, { "epoch": 4.106896551724138, "grad_norm": 1.2485966682434082, "learning_rate": 4.9158620689655175e-06, "loss": 0.5726, "step": 1191 }, { "epoch": 4.110344827586207, "grad_norm": 1.4025249481201172, "learning_rate": 4.92e-06, "loss": 0.5723, "step": 1192 }, { "epoch": 4.113793103448276, "grad_norm": 1.3759719133377075, "learning_rate": 4.924137931034483e-06, "loss": 0.5949, "step": 1193 }, { "epoch": 4.117241379310345, "grad_norm": 1.8814278841018677, "learning_rate": 4.928275862068966e-06, "loss": 0.5751, "step": 1194 }, { "epoch": 4.120689655172414, "grad_norm": 1.9528604745864868, "learning_rate": 4.932413793103449e-06, "loss": 0.5444, "step": 1195 }, { "epoch": 4.124137931034483, "grad_norm": 1.6784486770629883, "learning_rate": 4.936551724137931e-06, "loss": 0.6015, "step": 1196 }, { "epoch": 4.127586206896551, "grad_norm": 1.2964909076690674, "learning_rate": 4.940689655172414e-06, "loss": 0.648, "step": 1197 }, { "epoch": 4.13103448275862, "grad_norm": 1.2219972610473633, "learning_rate": 4.944827586206897e-06, "loss": 0.5382, "step": 1198 }, { "epoch": 4.13448275862069, "grad_norm": 1.6322321891784668, "learning_rate": 4.9489655172413795e-06, "loss": 0.5713, "step": 1199 }, { "epoch": 4.137931034482759, "grad_norm": 1.3404291868209839, "learning_rate": 4.953103448275862e-06, "loss": 0.5671, "step": 1200 }, { "epoch": 4.141379310344828, "grad_norm": 1.495418667793274, "learning_rate": 4.957241379310345e-06, "loss": 0.5999, "step": 1201 }, { "epoch": 4.144827586206897, "grad_norm": 2.0218019485473633, "learning_rate": 4.961379310344827e-06, "loss": 0.562, "step": 1202 }, { "epoch": 4.1482758620689655, "grad_norm": 1.5573328733444214, "learning_rate": 4.96551724137931e-06, "loss": 0.6012, "step": 1203 }, { "epoch": 4.151724137931034, "grad_norm": 3.410614490509033, "learning_rate": 4.969655172413793e-06, "loss": 0.6412, "step": 1204 }, { "epoch": 4.155172413793103, "grad_norm": 2.1132445335388184, "learning_rate": 4.973793103448276e-06, "loss": 0.6065, "step": 1205 }, { "epoch": 4.158620689655172, "grad_norm": 1.7335243225097656, "learning_rate": 4.977931034482759e-06, "loss": 0.6166, "step": 1206 }, { "epoch": 4.162068965517241, "grad_norm": 1.8110710382461548, "learning_rate": 4.982068965517242e-06, "loss": 0.6227, "step": 1207 }, { "epoch": 4.165517241379311, "grad_norm": 2.026869773864746, "learning_rate": 4.9862068965517245e-06, "loss": 0.6187, "step": 1208 }, { "epoch": 4.1689655172413795, "grad_norm": 2.977159261703491, "learning_rate": 4.9903448275862065e-06, "loss": 0.6824, "step": 1209 }, { "epoch": 4.172413793103448, "grad_norm": 2.556142807006836, "learning_rate": 4.994482758620689e-06, "loss": 0.8604, "step": 1210 }, { "epoch": 4.175862068965517, "grad_norm": 1.1832791566848755, "learning_rate": 4.998620689655172e-06, "loss": 0.5994, "step": 1211 }, { "epoch": 4.179310344827586, "grad_norm": 1.305628776550293, "learning_rate": 5.002758620689655e-06, "loss": 0.6121, "step": 1212 }, { "epoch": 4.182758620689655, "grad_norm": 0.9766099452972412, "learning_rate": 5.006896551724138e-06, "loss": 0.5252, "step": 1213 }, { "epoch": 4.186206896551724, "grad_norm": 0.9336771368980408, "learning_rate": 5.011034482758621e-06, "loss": 0.5551, "step": 1214 }, { "epoch": 4.189655172413793, "grad_norm": 0.914784848690033, "learning_rate": 5.015172413793103e-06, "loss": 0.5198, "step": 1215 }, { "epoch": 4.1931034482758625, "grad_norm": 1.3804891109466553, "learning_rate": 5.019310344827586e-06, "loss": 0.5353, "step": 1216 }, { "epoch": 4.196551724137931, "grad_norm": 1.8472063541412354, "learning_rate": 5.023448275862069e-06, "loss": 0.544, "step": 1217 }, { "epoch": 4.2, "grad_norm": 1.7666079998016357, "learning_rate": 5.027586206896552e-06, "loss": 0.5347, "step": 1218 }, { "epoch": 4.203448275862069, "grad_norm": 1.2035435438156128, "learning_rate": 5.031724137931035e-06, "loss": 0.5299, "step": 1219 }, { "epoch": 4.206896551724138, "grad_norm": 1.3764044046401978, "learning_rate": 5.035862068965518e-06, "loss": 0.5695, "step": 1220 }, { "epoch": 4.210344827586207, "grad_norm": 1.5244250297546387, "learning_rate": 5.04e-06, "loss": 0.5975, "step": 1221 }, { "epoch": 4.213793103448276, "grad_norm": 1.6553735733032227, "learning_rate": 5.044137931034483e-06, "loss": 0.5696, "step": 1222 }, { "epoch": 4.2172413793103445, "grad_norm": 1.398626685142517, "learning_rate": 5.048275862068966e-06, "loss": 0.5744, "step": 1223 }, { "epoch": 4.220689655172414, "grad_norm": 1.2012838125228882, "learning_rate": 5.052413793103449e-06, "loss": 0.5051, "step": 1224 }, { "epoch": 4.224137931034483, "grad_norm": 1.306075930595398, "learning_rate": 5.0565517241379315e-06, "loss": 0.5562, "step": 1225 }, { "epoch": 4.227586206896552, "grad_norm": 2.015580177307129, "learning_rate": 5.060689655172414e-06, "loss": 0.5515, "step": 1226 }, { "epoch": 4.231034482758621, "grad_norm": 1.4236047267913818, "learning_rate": 5.064827586206897e-06, "loss": 0.5804, "step": 1227 }, { "epoch": 4.23448275862069, "grad_norm": 1.5213685035705566, "learning_rate": 5.068965517241379e-06, "loss": 0.5445, "step": 1228 }, { "epoch": 4.2379310344827585, "grad_norm": 1.6793516874313354, "learning_rate": 5.073103448275862e-06, "loss": 0.6083, "step": 1229 }, { "epoch": 4.241379310344827, "grad_norm": 1.529329776763916, "learning_rate": 5.077241379310345e-06, "loss": 0.6286, "step": 1230 }, { "epoch": 4.244827586206896, "grad_norm": 1.9705142974853516, "learning_rate": 5.081379310344828e-06, "loss": 0.6378, "step": 1231 }, { "epoch": 4.248275862068965, "grad_norm": 2.0714190006256104, "learning_rate": 5.085517241379311e-06, "loss": 0.6222, "step": 1232 }, { "epoch": 4.251724137931035, "grad_norm": 2.0049710273742676, "learning_rate": 5.0896551724137936e-06, "loss": 0.678, "step": 1233 }, { "epoch": 4.255172413793104, "grad_norm": 2.8552935123443604, "learning_rate": 5.093793103448276e-06, "loss": 0.7367, "step": 1234 }, { "epoch": 4.258620689655173, "grad_norm": 2.6540281772613525, "learning_rate": 5.0979310344827584e-06, "loss": 0.7707, "step": 1235 }, { "epoch": 4.2620689655172415, "grad_norm": 1.5443202257156372, "learning_rate": 5.102068965517241e-06, "loss": 0.5748, "step": 1236 }, { "epoch": 4.26551724137931, "grad_norm": 0.9827457666397095, "learning_rate": 5.106206896551724e-06, "loss": 0.5356, "step": 1237 }, { "epoch": 4.268965517241379, "grad_norm": 1.3710821866989136, "learning_rate": 5.110344827586207e-06, "loss": 0.5695, "step": 1238 }, { "epoch": 4.272413793103448, "grad_norm": 1.0474283695220947, "learning_rate": 5.11448275862069e-06, "loss": 0.5459, "step": 1239 }, { "epoch": 4.275862068965517, "grad_norm": 1.2811849117279053, "learning_rate": 5.118620689655173e-06, "loss": 0.5368, "step": 1240 }, { "epoch": 4.279310344827586, "grad_norm": 1.436128854751587, "learning_rate": 5.122758620689655e-06, "loss": 0.5345, "step": 1241 }, { "epoch": 4.2827586206896555, "grad_norm": 1.110337495803833, "learning_rate": 5.126896551724138e-06, "loss": 0.5537, "step": 1242 }, { "epoch": 4.286206896551724, "grad_norm": 1.3263413906097412, "learning_rate": 5.1310344827586205e-06, "loss": 0.5808, "step": 1243 }, { "epoch": 4.289655172413793, "grad_norm": 1.4592989683151245, "learning_rate": 5.135172413793103e-06, "loss": 0.5394, "step": 1244 }, { "epoch": 4.293103448275862, "grad_norm": 1.1670253276824951, "learning_rate": 5.139310344827586e-06, "loss": 0.552, "step": 1245 }, { "epoch": 4.296551724137931, "grad_norm": 1.1619880199432373, "learning_rate": 5.143448275862069e-06, "loss": 0.5762, "step": 1246 }, { "epoch": 4.3, "grad_norm": 1.4918776750564575, "learning_rate": 5.147586206896551e-06, "loss": 0.5457, "step": 1247 }, { "epoch": 4.303448275862069, "grad_norm": 1.3101987838745117, "learning_rate": 5.151724137931034e-06, "loss": 0.5011, "step": 1248 }, { "epoch": 4.3068965517241375, "grad_norm": 2.336007595062256, "learning_rate": 5.155862068965517e-06, "loss": 0.5854, "step": 1249 }, { "epoch": 4.310344827586207, "grad_norm": 2.16977596282959, "learning_rate": 5.16e-06, "loss": 0.608, "step": 1250 }, { "epoch": 4.313793103448276, "grad_norm": 1.3469809293746948, "learning_rate": 5.1641379310344834e-06, "loss": 0.5458, "step": 1251 }, { "epoch": 4.317241379310345, "grad_norm": 1.4336944818496704, "learning_rate": 5.168275862068966e-06, "loss": 0.5662, "step": 1252 }, { "epoch": 4.320689655172414, "grad_norm": 1.5361557006835938, "learning_rate": 5.172413793103449e-06, "loss": 0.5546, "step": 1253 }, { "epoch": 4.324137931034483, "grad_norm": 1.3224831819534302, "learning_rate": 5.176551724137931e-06, "loss": 0.5727, "step": 1254 }, { "epoch": 4.327586206896552, "grad_norm": 1.4064258337020874, "learning_rate": 5.180689655172414e-06, "loss": 0.5856, "step": 1255 }, { "epoch": 4.3310344827586205, "grad_norm": 1.718428611755371, "learning_rate": 5.184827586206897e-06, "loss": 0.5599, "step": 1256 }, { "epoch": 4.334482758620689, "grad_norm": 1.4680752754211426, "learning_rate": 5.18896551724138e-06, "loss": 0.5978, "step": 1257 }, { "epoch": 4.337931034482759, "grad_norm": 1.8881475925445557, "learning_rate": 5.193103448275863e-06, "loss": 0.6231, "step": 1258 }, { "epoch": 4.341379310344828, "grad_norm": 2.1881754398345947, "learning_rate": 5.1972413793103455e-06, "loss": 0.6853, "step": 1259 }, { "epoch": 4.344827586206897, "grad_norm": 2.7713582515716553, "learning_rate": 5.2013793103448275e-06, "loss": 0.7857, "step": 1260 }, { "epoch": 4.348275862068966, "grad_norm": 2.3919708728790283, "learning_rate": 5.20551724137931e-06, "loss": 0.6004, "step": 1261 }, { "epoch": 4.3517241379310345, "grad_norm": 1.1405091285705566, "learning_rate": 5.209655172413793e-06, "loss": 0.526, "step": 1262 }, { "epoch": 4.355172413793103, "grad_norm": 1.2431750297546387, "learning_rate": 5.213793103448276e-06, "loss": 0.5436, "step": 1263 }, { "epoch": 4.358620689655172, "grad_norm": 1.2658376693725586, "learning_rate": 5.217931034482759e-06, "loss": 0.5444, "step": 1264 }, { "epoch": 4.362068965517241, "grad_norm": 1.4762036800384521, "learning_rate": 5.222068965517242e-06, "loss": 0.5368, "step": 1265 }, { "epoch": 4.36551724137931, "grad_norm": 1.204459309577942, "learning_rate": 5.226206896551724e-06, "loss": 0.5596, "step": 1266 }, { "epoch": 4.36896551724138, "grad_norm": 1.3317537307739258, "learning_rate": 5.230344827586207e-06, "loss": 0.5246, "step": 1267 }, { "epoch": 4.372413793103449, "grad_norm": 2.01690411567688, "learning_rate": 5.23448275862069e-06, "loss": 0.6252, "step": 1268 }, { "epoch": 4.375862068965517, "grad_norm": 2.94779372215271, "learning_rate": 5.2386206896551725e-06, "loss": 0.5102, "step": 1269 }, { "epoch": 4.379310344827586, "grad_norm": 1.3195956945419312, "learning_rate": 5.242758620689655e-06, "loss": 0.5353, "step": 1270 }, { "epoch": 4.382758620689655, "grad_norm": 1.5097264051437378, "learning_rate": 5.246896551724138e-06, "loss": 0.5207, "step": 1271 }, { "epoch": 4.386206896551724, "grad_norm": 1.1743531227111816, "learning_rate": 5.251034482758621e-06, "loss": 0.5614, "step": 1272 }, { "epoch": 4.389655172413793, "grad_norm": 2.7347331047058105, "learning_rate": 5.255172413793103e-06, "loss": 0.568, "step": 1273 }, { "epoch": 4.393103448275862, "grad_norm": 1.3468279838562012, "learning_rate": 5.259310344827586e-06, "loss": 0.5831, "step": 1274 }, { "epoch": 4.396551724137931, "grad_norm": 1.2977228164672852, "learning_rate": 5.263448275862069e-06, "loss": 0.5669, "step": 1275 }, { "epoch": 4.4, "grad_norm": 1.7240349054336548, "learning_rate": 5.267586206896552e-06, "loss": 0.5533, "step": 1276 }, { "epoch": 4.403448275862069, "grad_norm": 1.2062166929244995, "learning_rate": 5.2717241379310345e-06, "loss": 0.5725, "step": 1277 }, { "epoch": 4.406896551724138, "grad_norm": 1.3095430135726929, "learning_rate": 5.275862068965517e-06, "loss": 0.586, "step": 1278 }, { "epoch": 4.410344827586207, "grad_norm": 1.322229266166687, "learning_rate": 5.279999999999999e-06, "loss": 0.5445, "step": 1279 }, { "epoch": 4.413793103448276, "grad_norm": 1.8216618299484253, "learning_rate": 5.284137931034482e-06, "loss": 0.5937, "step": 1280 }, { "epoch": 4.417241379310345, "grad_norm": 1.5431632995605469, "learning_rate": 5.288275862068965e-06, "loss": 0.5862, "step": 1281 }, { "epoch": 4.4206896551724135, "grad_norm": 2.0508415699005127, "learning_rate": 5.292413793103448e-06, "loss": 0.645, "step": 1282 }, { "epoch": 4.424137931034482, "grad_norm": 1.6634420156478882, "learning_rate": 5.296551724137931e-06, "loss": 0.612, "step": 1283 }, { "epoch": 4.427586206896552, "grad_norm": 2.029219150543213, "learning_rate": 5.300689655172415e-06, "loss": 0.6383, "step": 1284 }, { "epoch": 4.431034482758621, "grad_norm": 2.734039306640625, "learning_rate": 5.3048275862068975e-06, "loss": 0.8233, "step": 1285 }, { "epoch": 4.43448275862069, "grad_norm": 1.3330830335617065, "learning_rate": 5.3089655172413795e-06, "loss": 0.6281, "step": 1286 }, { "epoch": 4.437931034482759, "grad_norm": 2.1332008838653564, "learning_rate": 5.313103448275862e-06, "loss": 0.5617, "step": 1287 }, { "epoch": 4.441379310344828, "grad_norm": 1.2769361734390259, "learning_rate": 5.317241379310345e-06, "loss": 0.529, "step": 1288 }, { "epoch": 4.444827586206896, "grad_norm": 1.2750800848007202, "learning_rate": 5.321379310344828e-06, "loss": 0.5502, "step": 1289 }, { "epoch": 4.448275862068965, "grad_norm": 1.5370765924453735, "learning_rate": 5.325517241379311e-06, "loss": 0.5352, "step": 1290 }, { "epoch": 4.451724137931034, "grad_norm": 1.4245456457138062, "learning_rate": 5.329655172413794e-06, "loss": 0.5185, "step": 1291 }, { "epoch": 4.455172413793104, "grad_norm": 0.9815402626991272, "learning_rate": 5.333793103448276e-06, "loss": 0.5099, "step": 1292 }, { "epoch": 4.458620689655173, "grad_norm": 1.8379625082015991, "learning_rate": 5.337931034482759e-06, "loss": 0.5646, "step": 1293 }, { "epoch": 4.462068965517242, "grad_norm": 1.5334964990615845, "learning_rate": 5.3420689655172416e-06, "loss": 0.5656, "step": 1294 }, { "epoch": 4.4655172413793105, "grad_norm": 1.2997410297393799, "learning_rate": 5.346206896551724e-06, "loss": 0.5128, "step": 1295 }, { "epoch": 4.468965517241379, "grad_norm": 1.260384440422058, "learning_rate": 5.350344827586207e-06, "loss": 0.5271, "step": 1296 }, { "epoch": 4.472413793103448, "grad_norm": 2.0812442302703857, "learning_rate": 5.35448275862069e-06, "loss": 0.5393, "step": 1297 }, { "epoch": 4.475862068965517, "grad_norm": 1.9477308988571167, "learning_rate": 5.358620689655173e-06, "loss": 0.511, "step": 1298 }, { "epoch": 4.479310344827586, "grad_norm": 1.5073001384735107, "learning_rate": 5.362758620689655e-06, "loss": 0.5435, "step": 1299 }, { "epoch": 4.482758620689655, "grad_norm": 1.5741318464279175, "learning_rate": 5.366896551724138e-06, "loss": 0.5636, "step": 1300 }, { "epoch": 4.4862068965517246, "grad_norm": 1.5509626865386963, "learning_rate": 5.371034482758621e-06, "loss": 0.5327, "step": 1301 }, { "epoch": 4.489655172413793, "grad_norm": 1.1373071670532227, "learning_rate": 5.375172413793104e-06, "loss": 0.5423, "step": 1302 }, { "epoch": 4.493103448275862, "grad_norm": 1.4976037740707397, "learning_rate": 5.3793103448275865e-06, "loss": 0.5756, "step": 1303 }, { "epoch": 4.496551724137931, "grad_norm": 1.7273507118225098, "learning_rate": 5.383448275862069e-06, "loss": 0.5434, "step": 1304 }, { "epoch": 4.5, "grad_norm": 2.712665557861328, "learning_rate": 5.387586206896551e-06, "loss": 0.5658, "step": 1305 }, { "epoch": 4.503448275862069, "grad_norm": 2.1269125938415527, "learning_rate": 5.391724137931034e-06, "loss": 0.5697, "step": 1306 }, { "epoch": 4.506896551724138, "grad_norm": 3.677243709564209, "learning_rate": 5.395862068965517e-06, "loss": 0.5503, "step": 1307 }, { "epoch": 4.510344827586207, "grad_norm": 3.599456787109375, "learning_rate": 5.4e-06, "loss": 0.5993, "step": 1308 }, { "epoch": 4.5137931034482754, "grad_norm": 2.2663350105285645, "learning_rate": 5.404137931034483e-06, "loss": 0.5825, "step": 1309 }, { "epoch": 4.517241379310345, "grad_norm": 2.3875787258148193, "learning_rate": 5.408275862068966e-06, "loss": 0.8195, "step": 1310 }, { "epoch": 4.520689655172414, "grad_norm": 0.9850808382034302, "learning_rate": 5.412413793103448e-06, "loss": 0.6168, "step": 1311 }, { "epoch": 4.524137931034483, "grad_norm": 1.136198878288269, "learning_rate": 5.416551724137931e-06, "loss": 0.5453, "step": 1312 }, { "epoch": 4.527586206896552, "grad_norm": 0.9664598107337952, "learning_rate": 5.4206896551724134e-06, "loss": 0.5501, "step": 1313 }, { "epoch": 4.531034482758621, "grad_norm": 1.422327995300293, "learning_rate": 5.424827586206896e-06, "loss": 0.5121, "step": 1314 }, { "epoch": 4.5344827586206895, "grad_norm": 1.1369857788085938, "learning_rate": 5.428965517241379e-06, "loss": 0.5191, "step": 1315 }, { "epoch": 4.537931034482758, "grad_norm": 1.2506316900253296, "learning_rate": 5.433103448275862e-06, "loss": 0.5195, "step": 1316 }, { "epoch": 4.541379310344827, "grad_norm": 1.25725519657135, "learning_rate": 5.437241379310346e-06, "loss": 0.5681, "step": 1317 }, { "epoch": 4.544827586206896, "grad_norm": 1.1171637773513794, "learning_rate": 5.441379310344828e-06, "loss": 0.4989, "step": 1318 }, { "epoch": 4.548275862068966, "grad_norm": 1.5136185884475708, "learning_rate": 5.445517241379311e-06, "loss": 0.5233, "step": 1319 }, { "epoch": 4.551724137931035, "grad_norm": 1.1950174570083618, "learning_rate": 5.4496551724137935e-06, "loss": 0.527, "step": 1320 }, { "epoch": 4.555172413793104, "grad_norm": 1.2559572458267212, "learning_rate": 5.453793103448276e-06, "loss": 0.5417, "step": 1321 }, { "epoch": 4.558620689655172, "grad_norm": 1.4451158046722412, "learning_rate": 5.457931034482759e-06, "loss": 0.5838, "step": 1322 }, { "epoch": 4.562068965517241, "grad_norm": 1.3989592790603638, "learning_rate": 5.462068965517242e-06, "loss": 0.5138, "step": 1323 }, { "epoch": 4.56551724137931, "grad_norm": 1.4611625671386719, "learning_rate": 5.466206896551724e-06, "loss": 0.553, "step": 1324 }, { "epoch": 4.568965517241379, "grad_norm": 1.7551802396774292, "learning_rate": 5.470344827586207e-06, "loss": 0.5836, "step": 1325 }, { "epoch": 4.572413793103449, "grad_norm": 1.1762012243270874, "learning_rate": 5.47448275862069e-06, "loss": 0.5495, "step": 1326 }, { "epoch": 4.575862068965518, "grad_norm": 2.5180766582489014, "learning_rate": 5.478620689655173e-06, "loss": 0.5673, "step": 1327 }, { "epoch": 4.5793103448275865, "grad_norm": 1.3752328157424927, "learning_rate": 5.4827586206896556e-06, "loss": 0.4929, "step": 1328 }, { "epoch": 4.582758620689655, "grad_norm": 3.8316433429718018, "learning_rate": 5.4868965517241384e-06, "loss": 0.5835, "step": 1329 }, { "epoch": 4.586206896551724, "grad_norm": 1.5059058666229248, "learning_rate": 5.491034482758621e-06, "loss": 0.5778, "step": 1330 }, { "epoch": 4.589655172413793, "grad_norm": 1.6501357555389404, "learning_rate": 5.495172413793103e-06, "loss": 0.5989, "step": 1331 }, { "epoch": 4.593103448275862, "grad_norm": 2.230517864227295, "learning_rate": 5.499310344827586e-06, "loss": 0.5411, "step": 1332 }, { "epoch": 4.596551724137931, "grad_norm": 2.0733985900878906, "learning_rate": 5.503448275862069e-06, "loss": 0.6073, "step": 1333 }, { "epoch": 4.6, "grad_norm": 1.652668833732605, "learning_rate": 5.507586206896552e-06, "loss": 0.5901, "step": 1334 }, { "epoch": 4.603448275862069, "grad_norm": 2.645883321762085, "learning_rate": 5.511724137931035e-06, "loss": 0.7825, "step": 1335 }, { "epoch": 4.606896551724138, "grad_norm": 1.8277097940444946, "learning_rate": 5.515862068965518e-06, "loss": 0.6045, "step": 1336 }, { "epoch": 4.610344827586207, "grad_norm": 1.141448736190796, "learning_rate": 5.52e-06, "loss": 0.529, "step": 1337 }, { "epoch": 4.613793103448276, "grad_norm": 1.1200393438339233, "learning_rate": 5.5241379310344825e-06, "loss": 0.5156, "step": 1338 }, { "epoch": 4.617241379310345, "grad_norm": 1.03079354763031, "learning_rate": 5.528275862068965e-06, "loss": 0.4941, "step": 1339 }, { "epoch": 4.620689655172414, "grad_norm": 1.594424843788147, "learning_rate": 5.532413793103448e-06, "loss": 0.5586, "step": 1340 }, { "epoch": 4.624137931034483, "grad_norm": 1.2771435976028442, "learning_rate": 5.536551724137931e-06, "loss": 0.5465, "step": 1341 }, { "epoch": 4.627586206896551, "grad_norm": 1.7970576286315918, "learning_rate": 5.540689655172414e-06, "loss": 0.5064, "step": 1342 }, { "epoch": 4.63103448275862, "grad_norm": 1.117545485496521, "learning_rate": 5.544827586206897e-06, "loss": 0.4973, "step": 1343 }, { "epoch": 4.63448275862069, "grad_norm": 1.0812605619430542, "learning_rate": 5.548965517241379e-06, "loss": 0.4901, "step": 1344 }, { "epoch": 4.637931034482759, "grad_norm": 1.2799067497253418, "learning_rate": 5.553103448275862e-06, "loss": 0.535, "step": 1345 }, { "epoch": 4.641379310344828, "grad_norm": 1.553443193435669, "learning_rate": 5.557241379310345e-06, "loss": 0.5247, "step": 1346 }, { "epoch": 4.644827586206897, "grad_norm": 1.871133804321289, "learning_rate": 5.5613793103448275e-06, "loss": 0.5127, "step": 1347 }, { "epoch": 4.6482758620689655, "grad_norm": 1.1553887128829956, "learning_rate": 5.56551724137931e-06, "loss": 0.5291, "step": 1348 }, { "epoch": 4.651724137931034, "grad_norm": 1.2458957433700562, "learning_rate": 5.569655172413793e-06, "loss": 0.508, "step": 1349 }, { "epoch": 4.655172413793103, "grad_norm": 1.4087532758712769, "learning_rate": 5.573793103448276e-06, "loss": 0.5304, "step": 1350 }, { "epoch": 4.658620689655172, "grad_norm": 2.2202773094177246, "learning_rate": 5.577931034482759e-06, "loss": 0.5329, "step": 1351 }, { "epoch": 4.662068965517241, "grad_norm": 3.7882633209228516, "learning_rate": 5.582068965517242e-06, "loss": 0.5355, "step": 1352 }, { "epoch": 4.665517241379311, "grad_norm": 1.666274905204773, "learning_rate": 5.586206896551725e-06, "loss": 0.5168, "step": 1353 }, { "epoch": 4.6689655172413795, "grad_norm": 2.341519355773926, "learning_rate": 5.5903448275862075e-06, "loss": 0.5429, "step": 1354 }, { "epoch": 4.672413793103448, "grad_norm": 1.509455919265747, "learning_rate": 5.59448275862069e-06, "loss": 0.523, "step": 1355 }, { "epoch": 4.675862068965517, "grad_norm": 2.2580389976501465, "learning_rate": 5.598620689655172e-06, "loss": 0.5883, "step": 1356 }, { "epoch": 4.679310344827586, "grad_norm": 1.9380614757537842, "learning_rate": 5.602758620689655e-06, "loss": 0.5467, "step": 1357 }, { "epoch": 4.682758620689655, "grad_norm": 2.488285541534424, "learning_rate": 5.606896551724138e-06, "loss": 0.5939, "step": 1358 }, { "epoch": 4.686206896551724, "grad_norm": 1.8619499206542969, "learning_rate": 5.611034482758621e-06, "loss": 0.6248, "step": 1359 }, { "epoch": 4.689655172413794, "grad_norm": 2.924204111099243, "learning_rate": 5.615172413793104e-06, "loss": 0.8017, "step": 1360 }, { "epoch": 4.6931034482758625, "grad_norm": 1.2641090154647827, "learning_rate": 5.619310344827587e-06, "loss": 0.5865, "step": 1361 }, { "epoch": 4.696551724137931, "grad_norm": 1.0834177732467651, "learning_rate": 5.62344827586207e-06, "loss": 0.4972, "step": 1362 }, { "epoch": 4.7, "grad_norm": 1.204727053642273, "learning_rate": 5.627586206896552e-06, "loss": 0.492, "step": 1363 }, { "epoch": 4.703448275862069, "grad_norm": 1.308902382850647, "learning_rate": 5.6317241379310345e-06, "loss": 0.5438, "step": 1364 }, { "epoch": 4.706896551724138, "grad_norm": 0.9877138137817383, "learning_rate": 5.635862068965517e-06, "loss": 0.5167, "step": 1365 }, { "epoch": 4.710344827586207, "grad_norm": 1.2572021484375, "learning_rate": 5.64e-06, "loss": 0.483, "step": 1366 }, { "epoch": 4.713793103448276, "grad_norm": 1.0616899728775024, "learning_rate": 5.644137931034483e-06, "loss": 0.4926, "step": 1367 }, { "epoch": 4.7172413793103445, "grad_norm": 1.7163647413253784, "learning_rate": 5.648275862068966e-06, "loss": 0.49, "step": 1368 }, { "epoch": 4.720689655172414, "grad_norm": 1.4963575601577759, "learning_rate": 5.652413793103448e-06, "loss": 0.527, "step": 1369 }, { "epoch": 4.724137931034483, "grad_norm": 1.3868992328643799, "learning_rate": 5.656551724137931e-06, "loss": 0.5288, "step": 1370 }, { "epoch": 4.727586206896552, "grad_norm": 1.196429967880249, "learning_rate": 5.660689655172414e-06, "loss": 0.5085, "step": 1371 }, { "epoch": 4.731034482758621, "grad_norm": 1.3196688890457153, "learning_rate": 5.6648275862068966e-06, "loss": 0.5064, "step": 1372 }, { "epoch": 4.73448275862069, "grad_norm": 1.21786630153656, "learning_rate": 5.668965517241379e-06, "loss": 0.4948, "step": 1373 }, { "epoch": 4.7379310344827585, "grad_norm": 1.2351001501083374, "learning_rate": 5.673103448275862e-06, "loss": 0.5439, "step": 1374 }, { "epoch": 4.741379310344827, "grad_norm": 1.2538071870803833, "learning_rate": 5.677241379310345e-06, "loss": 0.5177, "step": 1375 }, { "epoch": 4.744827586206896, "grad_norm": 1.7726207971572876, "learning_rate": 5.681379310344827e-06, "loss": 0.5711, "step": 1376 }, { "epoch": 4.748275862068965, "grad_norm": 1.1906723976135254, "learning_rate": 5.68551724137931e-06, "loss": 0.5103, "step": 1377 }, { "epoch": 4.751724137931035, "grad_norm": 1.701032280921936, "learning_rate": 5.689655172413793e-06, "loss": 0.5271, "step": 1378 }, { "epoch": 4.755172413793104, "grad_norm": 1.874239206314087, "learning_rate": 5.693793103448276e-06, "loss": 0.5274, "step": 1379 }, { "epoch": 4.758620689655173, "grad_norm": 1.6729116439819336, "learning_rate": 5.697931034482759e-06, "loss": 0.5369, "step": 1380 }, { "epoch": 4.7620689655172415, "grad_norm": 1.8794020414352417, "learning_rate": 5.7020689655172415e-06, "loss": 0.561, "step": 1381 }, { "epoch": 4.76551724137931, "grad_norm": 1.4742891788482666, "learning_rate": 5.7062068965517235e-06, "loss": 0.5297, "step": 1382 }, { "epoch": 4.768965517241379, "grad_norm": 1.8301664590835571, "learning_rate": 5.710344827586206e-06, "loss": 0.5462, "step": 1383 }, { "epoch": 4.772413793103448, "grad_norm": 3.885788917541504, "learning_rate": 5.71448275862069e-06, "loss": 0.6689, "step": 1384 }, { "epoch": 4.775862068965517, "grad_norm": 2.3869388103485107, "learning_rate": 5.718620689655173e-06, "loss": 0.7979, "step": 1385 }, { "epoch": 4.779310344827586, "grad_norm": 1.1915236711502075, "learning_rate": 5.722758620689656e-06, "loss": 0.5864, "step": 1386 }, { "epoch": 4.7827586206896555, "grad_norm": 0.9243395328521729, "learning_rate": 5.726896551724139e-06, "loss": 0.5204, "step": 1387 }, { "epoch": 4.786206896551724, "grad_norm": 1.153385043144226, "learning_rate": 5.7310344827586215e-06, "loss": 0.5254, "step": 1388 }, { "epoch": 4.789655172413793, "grad_norm": 1.7257778644561768, "learning_rate": 5.7351724137931036e-06, "loss": 0.485, "step": 1389 }, { "epoch": 4.793103448275862, "grad_norm": 1.3941590785980225, "learning_rate": 5.7393103448275864e-06, "loss": 0.4916, "step": 1390 }, { "epoch": 4.796551724137931, "grad_norm": 1.0181026458740234, "learning_rate": 5.743448275862069e-06, "loss": 0.4993, "step": 1391 }, { "epoch": 4.8, "grad_norm": 1.65494704246521, "learning_rate": 5.747586206896552e-06, "loss": 0.5095, "step": 1392 }, { "epoch": 4.803448275862069, "grad_norm": 1.5053163766860962, "learning_rate": 5.751724137931035e-06, "loss": 0.5135, "step": 1393 }, { "epoch": 4.8068965517241375, "grad_norm": 1.4853988885879517, "learning_rate": 5.755862068965518e-06, "loss": 0.5053, "step": 1394 }, { "epoch": 4.810344827586206, "grad_norm": 1.4294706583023071, "learning_rate": 5.76e-06, "loss": 0.499, "step": 1395 }, { "epoch": 4.813793103448276, "grad_norm": 3.782879114151001, "learning_rate": 5.764137931034483e-06, "loss": 0.4846, "step": 1396 }, { "epoch": 4.817241379310345, "grad_norm": 1.4057222604751587, "learning_rate": 5.768275862068966e-06, "loss": 0.5449, "step": 1397 }, { "epoch": 4.820689655172414, "grad_norm": 1.4759464263916016, "learning_rate": 5.7724137931034485e-06, "loss": 0.5104, "step": 1398 }, { "epoch": 4.824137931034483, "grad_norm": 1.8219412565231323, "learning_rate": 5.776551724137931e-06, "loss": 0.5091, "step": 1399 }, { "epoch": 4.827586206896552, "grad_norm": 1.3981959819793701, "learning_rate": 5.780689655172414e-06, "loss": 0.5129, "step": 1400 }, { "epoch": 4.8310344827586205, "grad_norm": 5.214570045471191, "learning_rate": 5.784827586206896e-06, "loss": 0.543, "step": 1401 }, { "epoch": 4.834482758620689, "grad_norm": 3.9891321659088135, "learning_rate": 5.788965517241379e-06, "loss": 0.5283, "step": 1402 }, { "epoch": 4.837931034482759, "grad_norm": 1.447596549987793, "learning_rate": 5.793103448275862e-06, "loss": 0.5424, "step": 1403 }, { "epoch": 4.841379310344828, "grad_norm": 1.8949192762374878, "learning_rate": 5.797241379310345e-06, "loss": 0.5731, "step": 1404 }, { "epoch": 4.844827586206897, "grad_norm": 2.5810155868530273, "learning_rate": 5.801379310344828e-06, "loss": 0.5893, "step": 1405 }, { "epoch": 4.848275862068966, "grad_norm": 2.169055461883545, "learning_rate": 5.805517241379311e-06, "loss": 0.525, "step": 1406 }, { "epoch": 4.8517241379310345, "grad_norm": 2.178529977798462, "learning_rate": 5.8096551724137934e-06, "loss": 0.5708, "step": 1407 }, { "epoch": 4.855172413793103, "grad_norm": 4.014997959136963, "learning_rate": 5.8137931034482755e-06, "loss": 0.5761, "step": 1408 }, { "epoch": 4.858620689655172, "grad_norm": 4.364734172821045, "learning_rate": 5.817931034482758e-06, "loss": 0.5833, "step": 1409 }, { "epoch": 4.862068965517241, "grad_norm": 2.0657718181610107, "learning_rate": 5.822068965517241e-06, "loss": 0.7831, "step": 1410 }, { "epoch": 4.86551724137931, "grad_norm": 2.6200196743011475, "learning_rate": 5.826206896551724e-06, "loss": 0.5808, "step": 1411 }, { "epoch": 4.86896551724138, "grad_norm": 1.0740351676940918, "learning_rate": 5.830344827586207e-06, "loss": 0.5111, "step": 1412 }, { "epoch": 4.872413793103449, "grad_norm": 1.0740233659744263, "learning_rate": 5.83448275862069e-06, "loss": 0.5043, "step": 1413 }, { "epoch": 4.875862068965517, "grad_norm": 1.0257552862167358, "learning_rate": 5.838620689655172e-06, "loss": 0.5661, "step": 1414 }, { "epoch": 4.879310344827586, "grad_norm": 1.0683393478393555, "learning_rate": 5.842758620689655e-06, "loss": 0.4898, "step": 1415 }, { "epoch": 4.882758620689655, "grad_norm": 1.318769931793213, "learning_rate": 5.8468965517241375e-06, "loss": 0.5119, "step": 1416 }, { "epoch": 4.886206896551724, "grad_norm": 1.154025673866272, "learning_rate": 5.851034482758621e-06, "loss": 0.5112, "step": 1417 }, { "epoch": 4.889655172413793, "grad_norm": 1.2420552968978882, "learning_rate": 5.855172413793104e-06, "loss": 0.5026, "step": 1418 }, { "epoch": 4.893103448275862, "grad_norm": 1.1259058713912964, "learning_rate": 5.859310344827587e-06, "loss": 0.466, "step": 1419 }, { "epoch": 4.896551724137931, "grad_norm": 1.307750940322876, "learning_rate": 5.86344827586207e-06, "loss": 0.5289, "step": 1420 }, { "epoch": 4.9, "grad_norm": 1.4108456373214722, "learning_rate": 5.867586206896552e-06, "loss": 0.509, "step": 1421 }, { "epoch": 4.903448275862069, "grad_norm": 1.2704194784164429, "learning_rate": 5.871724137931035e-06, "loss": 0.4945, "step": 1422 }, { "epoch": 4.906896551724138, "grad_norm": 1.1467936038970947, "learning_rate": 5.875862068965518e-06, "loss": 0.4855, "step": 1423 }, { "epoch": 4.910344827586207, "grad_norm": 1.3175846338272095, "learning_rate": 5.8800000000000005e-06, "loss": 0.5235, "step": 1424 }, { "epoch": 4.913793103448276, "grad_norm": 1.3416576385498047, "learning_rate": 5.884137931034483e-06, "loss": 0.5425, "step": 1425 }, { "epoch": 4.917241379310345, "grad_norm": 1.9785223007202148, "learning_rate": 5.888275862068966e-06, "loss": 0.4941, "step": 1426 }, { "epoch": 4.9206896551724135, "grad_norm": 1.1876784563064575, "learning_rate": 5.892413793103448e-06, "loss": 0.5329, "step": 1427 }, { "epoch": 4.924137931034482, "grad_norm": 1.6652774810791016, "learning_rate": 5.896551724137931e-06, "loss": 0.5122, "step": 1428 }, { "epoch": 4.927586206896551, "grad_norm": 1.5394973754882812, "learning_rate": 5.900689655172414e-06, "loss": 0.5528, "step": 1429 }, { "epoch": 4.931034482758621, "grad_norm": 1.4834027290344238, "learning_rate": 5.904827586206897e-06, "loss": 0.5189, "step": 1430 }, { "epoch": 4.93448275862069, "grad_norm": 2.2296125888824463, "learning_rate": 5.90896551724138e-06, "loss": 0.5771, "step": 1431 }, { "epoch": 4.937931034482759, "grad_norm": 1.6438491344451904, "learning_rate": 5.9131034482758625e-06, "loss": 0.5849, "step": 1432 }, { "epoch": 4.941379310344828, "grad_norm": 1.8630563020706177, "learning_rate": 5.917241379310345e-06, "loss": 0.6152, "step": 1433 }, { "epoch": 4.944827586206896, "grad_norm": 2.962858200073242, "learning_rate": 5.921379310344827e-06, "loss": 0.6582, "step": 1434 }, { "epoch": 4.948275862068965, "grad_norm": 3.275165557861328, "learning_rate": 5.92551724137931e-06, "loss": 0.8772, "step": 1435 }, { "epoch": 4.951724137931034, "grad_norm": 1.7890211343765259, "learning_rate": 5.929655172413793e-06, "loss": 0.4914, "step": 1436 }, { "epoch": 4.955172413793104, "grad_norm": 0.8650091290473938, "learning_rate": 5.933793103448276e-06, "loss": 0.4984, "step": 1437 }, { "epoch": 4.958620689655173, "grad_norm": 1.3396137952804565, "learning_rate": 5.937931034482759e-06, "loss": 0.5203, "step": 1438 }, { "epoch": 4.962068965517242, "grad_norm": 1.5991640090942383, "learning_rate": 5.942068965517242e-06, "loss": 0.5289, "step": 1439 }, { "epoch": 4.9655172413793105, "grad_norm": 1.3345272541046143, "learning_rate": 5.946206896551724e-06, "loss": 0.5067, "step": 1440 }, { "epoch": 4.968965517241379, "grad_norm": 1.136484146118164, "learning_rate": 5.950344827586207e-06, "loss": 0.5182, "step": 1441 }, { "epoch": 4.972413793103448, "grad_norm": 2.6568920612335205, "learning_rate": 5.9544827586206895e-06, "loss": 0.5031, "step": 1442 }, { "epoch": 4.975862068965517, "grad_norm": 1.745955467224121, "learning_rate": 5.958620689655172e-06, "loss": 0.5337, "step": 1443 }, { "epoch": 4.979310344827586, "grad_norm": 1.832842469215393, "learning_rate": 5.962758620689655e-06, "loss": 0.5784, "step": 1444 }, { "epoch": 4.982758620689655, "grad_norm": 1.209067940711975, "learning_rate": 5.966896551724138e-06, "loss": 0.522, "step": 1445 }, { "epoch": 4.9862068965517246, "grad_norm": 1.6614729166030884, "learning_rate": 5.97103448275862e-06, "loss": 0.5619, "step": 1446 }, { "epoch": 4.989655172413793, "grad_norm": 1.6847479343414307, "learning_rate": 5.975172413793103e-06, "loss": 0.5224, "step": 1447 }, { "epoch": 4.993103448275862, "grad_norm": 1.6686770915985107, "learning_rate": 5.979310344827586e-06, "loss": 0.5496, "step": 1448 }, { "epoch": 4.996551724137931, "grad_norm": 1.915513277053833, "learning_rate": 5.983448275862069e-06, "loss": 0.6009, "step": 1449 }, { "epoch": 5.0, "grad_norm": 2.2656967639923096, "learning_rate": 5.987586206896552e-06, "loss": 0.6942, "step": 1450 }, { "epoch": 5.003448275862069, "grad_norm": 3.0034565925598145, "learning_rate": 5.991724137931035e-06, "loss": 0.5334, "step": 1451 }, { "epoch": 5.006896551724138, "grad_norm": 1.7764610052108765, "learning_rate": 5.995862068965518e-06, "loss": 0.5663, "step": 1452 }, { "epoch": 5.010344827586207, "grad_norm": 3.5209882259368896, "learning_rate": 6e-06, "loss": 0.5163, "step": 1453 }, { "epoch": 5.0137931034482754, "grad_norm": 1.065741777420044, "learning_rate": 6.004137931034483e-06, "loss": 0.494, "step": 1454 }, { "epoch": 5.017241379310345, "grad_norm": 0.9970170855522156, "learning_rate": 6.008275862068966e-06, "loss": 0.5087, "step": 1455 }, { "epoch": 5.020689655172414, "grad_norm": 1.2110010385513306, "learning_rate": 6.012413793103449e-06, "loss": 0.486, "step": 1456 }, { "epoch": 5.024137931034483, "grad_norm": 1.1593170166015625, "learning_rate": 6.016551724137932e-06, "loss": 0.4664, "step": 1457 }, { "epoch": 5.027586206896552, "grad_norm": 1.1404155492782593, "learning_rate": 6.0206896551724145e-06, "loss": 0.5151, "step": 1458 }, { "epoch": 5.031034482758621, "grad_norm": 1.1128883361816406, "learning_rate": 6.0248275862068965e-06, "loss": 0.5009, "step": 1459 }, { "epoch": 5.0344827586206895, "grad_norm": 1.4908950328826904, "learning_rate": 6.028965517241379e-06, "loss": 0.5186, "step": 1460 }, { "epoch": 5.037931034482758, "grad_norm": 1.9945099353790283, "learning_rate": 6.033103448275862e-06, "loss": 0.4793, "step": 1461 }, { "epoch": 5.041379310344827, "grad_norm": 1.1819111108779907, "learning_rate": 6.037241379310345e-06, "loss": 0.5253, "step": 1462 }, { "epoch": 5.044827586206897, "grad_norm": 1.1296721696853638, "learning_rate": 6.041379310344828e-06, "loss": 0.4928, "step": 1463 }, { "epoch": 5.048275862068966, "grad_norm": 1.283504605293274, "learning_rate": 6.045517241379311e-06, "loss": 0.5127, "step": 1464 }, { "epoch": 5.051724137931035, "grad_norm": 1.2287187576293945, "learning_rate": 6.049655172413794e-06, "loss": 0.5239, "step": 1465 }, { "epoch": 5.055172413793104, "grad_norm": 1.976055383682251, "learning_rate": 6.053793103448276e-06, "loss": 0.4692, "step": 1466 }, { "epoch": 5.058620689655172, "grad_norm": 1.1845871210098267, "learning_rate": 6.0579310344827586e-06, "loss": 0.4991, "step": 1467 }, { "epoch": 5.062068965517241, "grad_norm": 1.5011857748031616, "learning_rate": 6.0620689655172414e-06, "loss": 0.5306, "step": 1468 }, { "epoch": 5.06551724137931, "grad_norm": 1.7220001220703125, "learning_rate": 6.066206896551724e-06, "loss": 0.5057, "step": 1469 }, { "epoch": 5.068965517241379, "grad_norm": 1.5177443027496338, "learning_rate": 6.070344827586207e-06, "loss": 0.5469, "step": 1470 }, { "epoch": 5.072413793103448, "grad_norm": 1.4502140283584595, "learning_rate": 6.07448275862069e-06, "loss": 0.5703, "step": 1471 }, { "epoch": 5.075862068965518, "grad_norm": 1.458350658416748, "learning_rate": 6.078620689655172e-06, "loss": 0.5197, "step": 1472 }, { "epoch": 5.0793103448275865, "grad_norm": 1.7169773578643799, "learning_rate": 6.082758620689655e-06, "loss": 0.5881, "step": 1473 }, { "epoch": 5.082758620689655, "grad_norm": 2.1966605186462402, "learning_rate": 6.086896551724138e-06, "loss": 0.6184, "step": 1474 }, { "epoch": 5.086206896551724, "grad_norm": 2.90733003616333, "learning_rate": 6.091034482758621e-06, "loss": 0.7807, "step": 1475 }, { "epoch": 5.089655172413793, "grad_norm": 1.3100042343139648, "learning_rate": 6.0951724137931035e-06, "loss": 0.612, "step": 1476 }, { "epoch": 5.093103448275862, "grad_norm": 1.104200005531311, "learning_rate": 6.099310344827586e-06, "loss": 0.4927, "step": 1477 }, { "epoch": 5.096551724137931, "grad_norm": 1.4883126020431519, "learning_rate": 6.103448275862069e-06, "loss": 0.4904, "step": 1478 }, { "epoch": 5.1, "grad_norm": 1.2458969354629517, "learning_rate": 6.107586206896551e-06, "loss": 0.4716, "step": 1479 }, { "epoch": 5.103448275862069, "grad_norm": 1.180527925491333, "learning_rate": 6.111724137931034e-06, "loss": 0.4874, "step": 1480 }, { "epoch": 5.106896551724138, "grad_norm": 1.2270616292953491, "learning_rate": 6.115862068965517e-06, "loss": 0.4782, "step": 1481 }, { "epoch": 5.110344827586207, "grad_norm": 1.061629056930542, "learning_rate": 6.12e-06, "loss": 0.4899, "step": 1482 }, { "epoch": 5.113793103448276, "grad_norm": 1.2521734237670898, "learning_rate": 6.1241379310344836e-06, "loss": 0.4539, "step": 1483 }, { "epoch": 5.117241379310345, "grad_norm": 1.4350357055664062, "learning_rate": 6.1282758620689664e-06, "loss": 0.488, "step": 1484 }, { "epoch": 5.120689655172414, "grad_norm": 1.181428074836731, "learning_rate": 6.1324137931034484e-06, "loss": 0.4758, "step": 1485 }, { "epoch": 5.124137931034483, "grad_norm": 1.4566370248794556, "learning_rate": 6.136551724137931e-06, "loss": 0.5187, "step": 1486 }, { "epoch": 5.127586206896551, "grad_norm": 2.0010669231414795, "learning_rate": 6.140689655172414e-06, "loss": 0.542, "step": 1487 }, { "epoch": 5.13103448275862, "grad_norm": 1.2479556798934937, "learning_rate": 6.144827586206897e-06, "loss": 0.4835, "step": 1488 }, { "epoch": 5.13448275862069, "grad_norm": 1.2887401580810547, "learning_rate": 6.14896551724138e-06, "loss": 0.4805, "step": 1489 }, { "epoch": 5.137931034482759, "grad_norm": 1.3122618198394775, "learning_rate": 6.153103448275863e-06, "loss": 0.4925, "step": 1490 }, { "epoch": 5.141379310344828, "grad_norm": 1.6836148500442505, "learning_rate": 6.157241379310345e-06, "loss": 0.5091, "step": 1491 }, { "epoch": 5.144827586206897, "grad_norm": 1.5120869874954224, "learning_rate": 6.161379310344828e-06, "loss": 0.486, "step": 1492 }, { "epoch": 5.1482758620689655, "grad_norm": 1.5131685733795166, "learning_rate": 6.1655172413793105e-06, "loss": 0.5221, "step": 1493 }, { "epoch": 5.151724137931034, "grad_norm": 1.6982169151306152, "learning_rate": 6.169655172413793e-06, "loss": 0.5216, "step": 1494 }, { "epoch": 5.155172413793103, "grad_norm": 1.4531848430633545, "learning_rate": 6.173793103448276e-06, "loss": 0.5649, "step": 1495 }, { "epoch": 5.158620689655172, "grad_norm": 2.135694742202759, "learning_rate": 6.177931034482759e-06, "loss": 0.5226, "step": 1496 }, { "epoch": 5.162068965517241, "grad_norm": 1.582730770111084, "learning_rate": 6.182068965517242e-06, "loss": 0.5288, "step": 1497 }, { "epoch": 5.165517241379311, "grad_norm": 1.9454001188278198, "learning_rate": 6.186206896551724e-06, "loss": 0.5413, "step": 1498 }, { "epoch": 5.1689655172413795, "grad_norm": 1.890738844871521, "learning_rate": 6.190344827586207e-06, "loss": 0.5769, "step": 1499 }, { "epoch": 5.172413793103448, "grad_norm": 2.39844012260437, "learning_rate": 6.19448275862069e-06, "loss": 0.7768, "step": 1500 }, { "epoch": 5.175862068965517, "grad_norm": 1.6439449787139893, "learning_rate": 6.198620689655173e-06, "loss": 0.527, "step": 1501 }, { "epoch": 5.179310344827586, "grad_norm": 0.9621250629425049, "learning_rate": 6.2027586206896555e-06, "loss": 0.4851, "step": 1502 }, { "epoch": 5.182758620689655, "grad_norm": 2.6463215351104736, "learning_rate": 6.206896551724138e-06, "loss": 0.4799, "step": 1503 }, { "epoch": 5.186206896551724, "grad_norm": 1.0414445400238037, "learning_rate": 6.21103448275862e-06, "loss": 0.4892, "step": 1504 }, { "epoch": 5.189655172413793, "grad_norm": 0.9244794249534607, "learning_rate": 6.215172413793103e-06, "loss": 0.4411, "step": 1505 }, { "epoch": 5.1931034482758625, "grad_norm": 1.7102617025375366, "learning_rate": 6.219310344827586e-06, "loss": 0.4713, "step": 1506 }, { "epoch": 5.196551724137931, "grad_norm": 1.2694947719573975, "learning_rate": 6.223448275862069e-06, "loss": 0.4993, "step": 1507 }, { "epoch": 5.2, "grad_norm": 2.024860382080078, "learning_rate": 6.227586206896552e-06, "loss": 0.5011, "step": 1508 }, { "epoch": 5.203448275862069, "grad_norm": 0.9389639496803284, "learning_rate": 6.231724137931035e-06, "loss": 0.4764, "step": 1509 }, { "epoch": 5.206896551724138, "grad_norm": 1.1428859233856201, "learning_rate": 6.2358620689655175e-06, "loss": 0.4905, "step": 1510 }, { "epoch": 5.210344827586207, "grad_norm": 1.9470065832138062, "learning_rate": 6.2399999999999995e-06, "loss": 0.4804, "step": 1511 }, { "epoch": 5.213793103448276, "grad_norm": 10.817809104919434, "learning_rate": 6.244137931034482e-06, "loss": 0.5008, "step": 1512 }, { "epoch": 5.2172413793103445, "grad_norm": 1.5081735849380493, "learning_rate": 6.248275862068965e-06, "loss": 0.4728, "step": 1513 }, { "epoch": 5.220689655172414, "grad_norm": 1.7760807275772095, "learning_rate": 6.252413793103448e-06, "loss": 0.4914, "step": 1514 }, { "epoch": 5.224137931034483, "grad_norm": 1.2988307476043701, "learning_rate": 6.256551724137931e-06, "loss": 0.5153, "step": 1515 }, { "epoch": 5.227586206896552, "grad_norm": 1.2028311491012573, "learning_rate": 6.260689655172414e-06, "loss": 0.4545, "step": 1516 }, { "epoch": 5.231034482758621, "grad_norm": 1.5636963844299316, "learning_rate": 6.264827586206897e-06, "loss": 0.5035, "step": 1517 }, { "epoch": 5.23448275862069, "grad_norm": 1.1805609464645386, "learning_rate": 6.26896551724138e-06, "loss": 0.4956, "step": 1518 }, { "epoch": 5.2379310344827585, "grad_norm": 1.6795072555541992, "learning_rate": 6.2731034482758625e-06, "loss": 0.5117, "step": 1519 }, { "epoch": 5.241379310344827, "grad_norm": 1.497580885887146, "learning_rate": 6.277241379310345e-06, "loss": 0.4681, "step": 1520 }, { "epoch": 5.244827586206896, "grad_norm": 1.6547729969024658, "learning_rate": 6.281379310344828e-06, "loss": 0.526, "step": 1521 }, { "epoch": 5.248275862068965, "grad_norm": 3.0807743072509766, "learning_rate": 6.285517241379311e-06, "loss": 0.5576, "step": 1522 }, { "epoch": 5.251724137931035, "grad_norm": 2.909329652786255, "learning_rate": 6.289655172413794e-06, "loss": 0.5655, "step": 1523 }, { "epoch": 5.255172413793104, "grad_norm": 2.6853981018066406, "learning_rate": 6.293793103448276e-06, "loss": 0.6307, "step": 1524 }, { "epoch": 5.258620689655173, "grad_norm": 3.077099323272705, "learning_rate": 6.297931034482759e-06, "loss": 0.7288, "step": 1525 }, { "epoch": 5.2620689655172415, "grad_norm": 0.9062200784683228, "learning_rate": 6.302068965517242e-06, "loss": 0.4949, "step": 1526 }, { "epoch": 5.26551724137931, "grad_norm": 1.5532556772232056, "learning_rate": 6.3062068965517245e-06, "loss": 0.4947, "step": 1527 }, { "epoch": 5.268965517241379, "grad_norm": 1.0003327131271362, "learning_rate": 6.310344827586207e-06, "loss": 0.5027, "step": 1528 }, { "epoch": 5.272413793103448, "grad_norm": 2.7187092304229736, "learning_rate": 6.31448275862069e-06, "loss": 0.4671, "step": 1529 }, { "epoch": 5.275862068965517, "grad_norm": 1.0593907833099365, "learning_rate": 6.318620689655172e-06, "loss": 0.4742, "step": 1530 }, { "epoch": 5.279310344827586, "grad_norm": 1.2486660480499268, "learning_rate": 6.322758620689655e-06, "loss": 0.4381, "step": 1531 }, { "epoch": 5.2827586206896555, "grad_norm": 1.0560729503631592, "learning_rate": 6.326896551724138e-06, "loss": 0.4792, "step": 1532 }, { "epoch": 5.286206896551724, "grad_norm": 2.040226936340332, "learning_rate": 6.331034482758621e-06, "loss": 0.4952, "step": 1533 }, { "epoch": 5.289655172413793, "grad_norm": 1.7190133333206177, "learning_rate": 6.335172413793104e-06, "loss": 0.4778, "step": 1534 }, { "epoch": 5.293103448275862, "grad_norm": 1.052417516708374, "learning_rate": 6.339310344827587e-06, "loss": 0.4687, "step": 1535 }, { "epoch": 5.296551724137931, "grad_norm": 1.1649876832962036, "learning_rate": 6.343448275862069e-06, "loss": 0.4617, "step": 1536 }, { "epoch": 5.3, "grad_norm": 1.0449192523956299, "learning_rate": 6.3475862068965515e-06, "loss": 0.4712, "step": 1537 }, { "epoch": 5.303448275862069, "grad_norm": 1.273069143295288, "learning_rate": 6.351724137931034e-06, "loss": 0.448, "step": 1538 }, { "epoch": 5.3068965517241375, "grad_norm": 1.1827744245529175, "learning_rate": 6.355862068965517e-06, "loss": 0.4765, "step": 1539 }, { "epoch": 5.310344827586207, "grad_norm": 1.7157291173934937, "learning_rate": 6.36e-06, "loss": 0.5114, "step": 1540 }, { "epoch": 5.313793103448276, "grad_norm": 1.4130038022994995, "learning_rate": 6.364137931034483e-06, "loss": 0.4684, "step": 1541 }, { "epoch": 5.317241379310345, "grad_norm": 1.2021533250808716, "learning_rate": 6.368275862068966e-06, "loss": 0.4556, "step": 1542 }, { "epoch": 5.320689655172414, "grad_norm": 2.0644240379333496, "learning_rate": 6.372413793103448e-06, "loss": 0.5193, "step": 1543 }, { "epoch": 5.324137931034483, "grad_norm": 1.7521440982818604, "learning_rate": 6.376551724137931e-06, "loss": 0.5019, "step": 1544 }, { "epoch": 5.327586206896552, "grad_norm": 1.296218752861023, "learning_rate": 6.3806896551724136e-06, "loss": 0.5044, "step": 1545 }, { "epoch": 5.3310344827586205, "grad_norm": 1.4953727722167969, "learning_rate": 6.3848275862068964e-06, "loss": 0.5035, "step": 1546 }, { "epoch": 5.334482758620689, "grad_norm": 1.5847978591918945, "learning_rate": 6.388965517241379e-06, "loss": 0.563, "step": 1547 }, { "epoch": 5.337931034482759, "grad_norm": 1.8577325344085693, "learning_rate": 6.393103448275862e-06, "loss": 0.5524, "step": 1548 }, { "epoch": 5.341379310344828, "grad_norm": 2.8688547611236572, "learning_rate": 6.397241379310344e-06, "loss": 0.5907, "step": 1549 }, { "epoch": 5.344827586206897, "grad_norm": 2.4486567974090576, "learning_rate": 6.401379310344828e-06, "loss": 0.7382, "step": 1550 }, { "epoch": 5.348275862068966, "grad_norm": 1.3764488697052002, "learning_rate": 6.405517241379311e-06, "loss": 0.5521, "step": 1551 }, { "epoch": 5.3517241379310345, "grad_norm": 1.0547049045562744, "learning_rate": 6.409655172413794e-06, "loss": 0.4608, "step": 1552 }, { "epoch": 5.355172413793103, "grad_norm": 1.1494797468185425, "learning_rate": 6.4137931034482765e-06, "loss": 0.5285, "step": 1553 }, { "epoch": 5.358620689655172, "grad_norm": 1.1384005546569824, "learning_rate": 6.417931034482759e-06, "loss": 0.4964, "step": 1554 }, { "epoch": 5.362068965517241, "grad_norm": 1.2566345930099487, "learning_rate": 6.422068965517242e-06, "loss": 0.4788, "step": 1555 }, { "epoch": 5.36551724137931, "grad_norm": 1.1201163530349731, "learning_rate": 6.426206896551724e-06, "loss": 0.4973, "step": 1556 }, { "epoch": 5.36896551724138, "grad_norm": 1.4310078620910645, "learning_rate": 6.430344827586207e-06, "loss": 0.4551, "step": 1557 }, { "epoch": 5.372413793103449, "grad_norm": 1.6433274745941162, "learning_rate": 6.43448275862069e-06, "loss": 0.5231, "step": 1558 }, { "epoch": 5.375862068965517, "grad_norm": 1.2432795763015747, "learning_rate": 6.438620689655173e-06, "loss": 0.4704, "step": 1559 }, { "epoch": 5.379310344827586, "grad_norm": 3.1629817485809326, "learning_rate": 6.442758620689656e-06, "loss": 0.4447, "step": 1560 }, { "epoch": 5.382758620689655, "grad_norm": 1.5377146005630493, "learning_rate": 6.4468965517241386e-06, "loss": 0.5499, "step": 1561 }, { "epoch": 5.386206896551724, "grad_norm": 1.701149582862854, "learning_rate": 6.451034482758621e-06, "loss": 0.5064, "step": 1562 }, { "epoch": 5.389655172413793, "grad_norm": 5.240401744842529, "learning_rate": 6.4551724137931034e-06, "loss": 0.49, "step": 1563 }, { "epoch": 5.393103448275862, "grad_norm": 1.5112409591674805, "learning_rate": 6.459310344827586e-06, "loss": 0.4493, "step": 1564 }, { "epoch": 5.396551724137931, "grad_norm": 1.2978135347366333, "learning_rate": 6.463448275862069e-06, "loss": 0.5019, "step": 1565 }, { "epoch": 5.4, "grad_norm": 1.4084522724151611, "learning_rate": 6.467586206896552e-06, "loss": 0.4921, "step": 1566 }, { "epoch": 5.403448275862069, "grad_norm": 1.3973407745361328, "learning_rate": 6.471724137931035e-06, "loss": 0.5023, "step": 1567 }, { "epoch": 5.406896551724138, "grad_norm": 1.336641550064087, "learning_rate": 6.475862068965518e-06, "loss": 0.5012, "step": 1568 }, { "epoch": 5.410344827586207, "grad_norm": 1.5885756015777588, "learning_rate": 6.48e-06, "loss": 0.5432, "step": 1569 }, { "epoch": 5.413793103448276, "grad_norm": 1.469720721244812, "learning_rate": 6.484137931034483e-06, "loss": 0.5483, "step": 1570 }, { "epoch": 5.417241379310345, "grad_norm": 1.8389045000076294, "learning_rate": 6.4882758620689655e-06, "loss": 0.527, "step": 1571 }, { "epoch": 5.4206896551724135, "grad_norm": 1.719602108001709, "learning_rate": 6.492413793103448e-06, "loss": 0.5015, "step": 1572 }, { "epoch": 5.424137931034482, "grad_norm": 1.68593430519104, "learning_rate": 6.496551724137931e-06, "loss": 0.5274, "step": 1573 }, { "epoch": 5.427586206896552, "grad_norm": 3.636967897415161, "learning_rate": 6.500689655172414e-06, "loss": 0.604, "step": 1574 }, { "epoch": 5.431034482758621, "grad_norm": 3.761242389678955, "learning_rate": 6.504827586206896e-06, "loss": 0.7321, "step": 1575 }, { "epoch": 5.43448275862069, "grad_norm": 1.65010404586792, "learning_rate": 6.508965517241379e-06, "loss": 0.5113, "step": 1576 }, { "epoch": 5.437931034482759, "grad_norm": 1.1668214797973633, "learning_rate": 6.513103448275862e-06, "loss": 0.4911, "step": 1577 }, { "epoch": 5.441379310344828, "grad_norm": 0.94740229845047, "learning_rate": 6.517241379310345e-06, "loss": 0.4286, "step": 1578 }, { "epoch": 5.444827586206896, "grad_norm": 1.8950741291046143, "learning_rate": 6.521379310344828e-06, "loss": 0.4675, "step": 1579 }, { "epoch": 5.448275862068965, "grad_norm": 1.2318079471588135, "learning_rate": 6.5255172413793105e-06, "loss": 0.4719, "step": 1580 }, { "epoch": 5.451724137931034, "grad_norm": 0.9501941800117493, "learning_rate": 6.5296551724137925e-06, "loss": 0.4734, "step": 1581 }, { "epoch": 5.455172413793104, "grad_norm": 1.0797241926193237, "learning_rate": 6.533793103448275e-06, "loss": 0.492, "step": 1582 }, { "epoch": 5.458620689655173, "grad_norm": 1.1396129131317139, "learning_rate": 6.537931034482759e-06, "loss": 0.4694, "step": 1583 }, { "epoch": 5.462068965517242, "grad_norm": 0.9921515583992004, "learning_rate": 6.542068965517242e-06, "loss": 0.4312, "step": 1584 }, { "epoch": 5.4655172413793105, "grad_norm": 1.3499151468276978, "learning_rate": 6.546206896551725e-06, "loss": 0.4648, "step": 1585 }, { "epoch": 5.468965517241379, "grad_norm": 1.56581711769104, "learning_rate": 6.550344827586208e-06, "loss": 0.5018, "step": 1586 }, { "epoch": 5.472413793103448, "grad_norm": 1.2540159225463867, "learning_rate": 6.5544827586206905e-06, "loss": 0.4734, "step": 1587 }, { "epoch": 5.475862068965517, "grad_norm": 1.2099387645721436, "learning_rate": 6.5586206896551725e-06, "loss": 0.4786, "step": 1588 }, { "epoch": 5.479310344827586, "grad_norm": 1.202226161956787, "learning_rate": 6.562758620689655e-06, "loss": 0.5024, "step": 1589 }, { "epoch": 5.482758620689655, "grad_norm": 1.1006178855895996, "learning_rate": 6.566896551724138e-06, "loss": 0.4745, "step": 1590 }, { "epoch": 5.4862068965517246, "grad_norm": 1.4490561485290527, "learning_rate": 6.571034482758621e-06, "loss": 0.4666, "step": 1591 }, { "epoch": 5.489655172413793, "grad_norm": 1.3825790882110596, "learning_rate": 6.575172413793104e-06, "loss": 0.5188, "step": 1592 }, { "epoch": 5.493103448275862, "grad_norm": 1.330759882926941, "learning_rate": 6.579310344827587e-06, "loss": 0.4543, "step": 1593 }, { "epoch": 5.496551724137931, "grad_norm": 1.4413869380950928, "learning_rate": 6.583448275862069e-06, "loss": 0.4911, "step": 1594 }, { "epoch": 5.5, "grad_norm": 1.3808039426803589, "learning_rate": 6.587586206896552e-06, "loss": 0.4642, "step": 1595 }, { "epoch": 5.503448275862069, "grad_norm": 1.3872556686401367, "learning_rate": 6.591724137931035e-06, "loss": 0.5241, "step": 1596 }, { "epoch": 5.506896551724138, "grad_norm": 1.7345777750015259, "learning_rate": 6.5958620689655175e-06, "loss": 0.4685, "step": 1597 }, { "epoch": 5.510344827586207, "grad_norm": 3.4457850456237793, "learning_rate": 6.6e-06, "loss": 0.5207, "step": 1598 }, { "epoch": 5.5137931034482754, "grad_norm": 2.298640489578247, "learning_rate": 6.604137931034483e-06, "loss": 0.5785, "step": 1599 }, { "epoch": 5.517241379310345, "grad_norm": 5.317802906036377, "learning_rate": 6.608275862068966e-06, "loss": 0.6701, "step": 1600 }, { "epoch": 5.520689655172414, "grad_norm": 1.3962513208389282, "learning_rate": 6.612413793103448e-06, "loss": 0.4986, "step": 1601 }, { "epoch": 5.524137931034483, "grad_norm": 0.8578850626945496, "learning_rate": 6.616551724137931e-06, "loss": 0.4574, "step": 1602 }, { "epoch": 5.527586206896552, "grad_norm": 1.201900839805603, "learning_rate": 6.620689655172414e-06, "loss": 0.4922, "step": 1603 }, { "epoch": 5.531034482758621, "grad_norm": 2.651052713394165, "learning_rate": 6.624827586206897e-06, "loss": 0.501, "step": 1604 }, { "epoch": 5.5344827586206895, "grad_norm": 1.1641134023666382, "learning_rate": 6.6289655172413795e-06, "loss": 0.4545, "step": 1605 }, { "epoch": 5.537931034482758, "grad_norm": 1.039204478263855, "learning_rate": 6.633103448275862e-06, "loss": 0.4402, "step": 1606 }, { "epoch": 5.541379310344827, "grad_norm": 1.0046738386154175, "learning_rate": 6.6372413793103444e-06, "loss": 0.4906, "step": 1607 }, { "epoch": 5.544827586206896, "grad_norm": 1.3948115110397339, "learning_rate": 6.641379310344827e-06, "loss": 0.4453, "step": 1608 }, { "epoch": 5.548275862068966, "grad_norm": 1.4555721282958984, "learning_rate": 6.64551724137931e-06, "loss": 0.4701, "step": 1609 }, { "epoch": 5.551724137931035, "grad_norm": 1.589053988456726, "learning_rate": 6.649655172413793e-06, "loss": 0.4756, "step": 1610 }, { "epoch": 5.555172413793104, "grad_norm": 1.06350839138031, "learning_rate": 6.653793103448276e-06, "loss": 0.4872, "step": 1611 }, { "epoch": 5.558620689655172, "grad_norm": 1.2251931428909302, "learning_rate": 6.657931034482759e-06, "loss": 0.4518, "step": 1612 }, { "epoch": 5.562068965517241, "grad_norm": 1.1508753299713135, "learning_rate": 6.662068965517242e-06, "loss": 0.4882, "step": 1613 }, { "epoch": 5.56551724137931, "grad_norm": 1.0915638208389282, "learning_rate": 6.666206896551724e-06, "loss": 0.4282, "step": 1614 }, { "epoch": 5.568965517241379, "grad_norm": 1.597983479499817, "learning_rate": 6.6703448275862065e-06, "loss": 0.491, "step": 1615 }, { "epoch": 5.572413793103449, "grad_norm": 1.3601558208465576, "learning_rate": 6.67448275862069e-06, "loss": 0.5329, "step": 1616 }, { "epoch": 5.575862068965518, "grad_norm": 1.7035250663757324, "learning_rate": 6.678620689655173e-06, "loss": 0.5095, "step": 1617 }, { "epoch": 5.5793103448275865, "grad_norm": 1.538737416267395, "learning_rate": 6.682758620689656e-06, "loss": 0.4699, "step": 1618 }, { "epoch": 5.582758620689655, "grad_norm": 1.52659273147583, "learning_rate": 6.686896551724139e-06, "loss": 0.5042, "step": 1619 }, { "epoch": 5.586206896551724, "grad_norm": 1.9247509241104126, "learning_rate": 6.691034482758621e-06, "loss": 0.4715, "step": 1620 }, { "epoch": 5.589655172413793, "grad_norm": 1.563853144645691, "learning_rate": 6.695172413793104e-06, "loss": 0.5082, "step": 1621 }, { "epoch": 5.593103448275862, "grad_norm": 1.4537099599838257, "learning_rate": 6.6993103448275866e-06, "loss": 0.5428, "step": 1622 }, { "epoch": 5.596551724137931, "grad_norm": 2.2288246154785156, "learning_rate": 6.703448275862069e-06, "loss": 0.5522, "step": 1623 }, { "epoch": 5.6, "grad_norm": 1.6224571466445923, "learning_rate": 6.707586206896552e-06, "loss": 0.5641, "step": 1624 }, { "epoch": 5.603448275862069, "grad_norm": 2.7171945571899414, "learning_rate": 6.711724137931035e-06, "loss": 0.7125, "step": 1625 }, { "epoch": 5.606896551724138, "grad_norm": 1.0552915334701538, "learning_rate": 6.715862068965517e-06, "loss": 0.5164, "step": 1626 }, { "epoch": 5.610344827586207, "grad_norm": 1.429757833480835, "learning_rate": 6.72e-06, "loss": 0.4895, "step": 1627 }, { "epoch": 5.613793103448276, "grad_norm": 1.0782626867294312, "learning_rate": 6.724137931034483e-06, "loss": 0.4542, "step": 1628 }, { "epoch": 5.617241379310345, "grad_norm": 0.9507354497909546, "learning_rate": 6.728275862068966e-06, "loss": 0.4643, "step": 1629 }, { "epoch": 5.620689655172414, "grad_norm": 1.162697434425354, "learning_rate": 6.732413793103449e-06, "loss": 0.4788, "step": 1630 }, { "epoch": 5.624137931034483, "grad_norm": 1.3973172903060913, "learning_rate": 6.7365517241379315e-06, "loss": 0.4944, "step": 1631 }, { "epoch": 5.627586206896551, "grad_norm": 1.2123847007751465, "learning_rate": 6.740689655172414e-06, "loss": 0.4599, "step": 1632 }, { "epoch": 5.63103448275862, "grad_norm": 1.273041844367981, "learning_rate": 6.744827586206896e-06, "loss": 0.46, "step": 1633 }, { "epoch": 5.63448275862069, "grad_norm": 2.0562658309936523, "learning_rate": 6.748965517241379e-06, "loss": 0.4781, "step": 1634 }, { "epoch": 5.637931034482759, "grad_norm": 1.3839462995529175, "learning_rate": 6.753103448275862e-06, "loss": 0.4422, "step": 1635 }, { "epoch": 5.641379310344828, "grad_norm": 1.1577547788619995, "learning_rate": 6.757241379310345e-06, "loss": 0.4618, "step": 1636 }, { "epoch": 5.644827586206897, "grad_norm": 1.2609241008758545, "learning_rate": 6.761379310344828e-06, "loss": 0.4769, "step": 1637 }, { "epoch": 5.6482758620689655, "grad_norm": 1.7298095226287842, "learning_rate": 6.765517241379311e-06, "loss": 0.4646, "step": 1638 }, { "epoch": 5.651724137931034, "grad_norm": 1.2041022777557373, "learning_rate": 6.769655172413793e-06, "loss": 0.482, "step": 1639 }, { "epoch": 5.655172413793103, "grad_norm": 3.407543182373047, "learning_rate": 6.773793103448276e-06, "loss": 0.5051, "step": 1640 }, { "epoch": 5.658620689655172, "grad_norm": 1.4929453134536743, "learning_rate": 6.7779310344827585e-06, "loss": 0.4478, "step": 1641 }, { "epoch": 5.662068965517241, "grad_norm": 1.3337253332138062, "learning_rate": 6.782068965517241e-06, "loss": 0.4964, "step": 1642 }, { "epoch": 5.665517241379311, "grad_norm": 1.5346611738204956, "learning_rate": 6.786206896551724e-06, "loss": 0.4712, "step": 1643 }, { "epoch": 5.6689655172413795, "grad_norm": 1.5164096355438232, "learning_rate": 6.790344827586207e-06, "loss": 0.4791, "step": 1644 }, { "epoch": 5.672413793103448, "grad_norm": 1.445085883140564, "learning_rate": 6.79448275862069e-06, "loss": 0.4521, "step": 1645 }, { "epoch": 5.675862068965517, "grad_norm": 1.673592209815979, "learning_rate": 6.798620689655172e-06, "loss": 0.5503, "step": 1646 }, { "epoch": 5.679310344827586, "grad_norm": 1.7134473323822021, "learning_rate": 6.802758620689655e-06, "loss": 0.4795, "step": 1647 }, { "epoch": 5.682758620689655, "grad_norm": 1.7591533660888672, "learning_rate": 6.806896551724138e-06, "loss": 0.5201, "step": 1648 }, { "epoch": 5.686206896551724, "grad_norm": 2.658745527267456, "learning_rate": 6.8110344827586205e-06, "loss": 0.5672, "step": 1649 }, { "epoch": 5.689655172413794, "grad_norm": 3.0621469020843506, "learning_rate": 6.815172413793104e-06, "loss": 0.687, "step": 1650 }, { "epoch": 5.6931034482758625, "grad_norm": 1.2867529392242432, "learning_rate": 6.819310344827587e-06, "loss": 0.5506, "step": 1651 }, { "epoch": 5.696551724137931, "grad_norm": 0.9462665915489197, "learning_rate": 6.823448275862069e-06, "loss": 0.4609, "step": 1652 }, { "epoch": 5.7, "grad_norm": 0.8676013946533203, "learning_rate": 6.827586206896552e-06, "loss": 0.462, "step": 1653 }, { "epoch": 5.703448275862069, "grad_norm": 1.2125016450881958, "learning_rate": 6.831724137931035e-06, "loss": 0.4265, "step": 1654 }, { "epoch": 5.706896551724138, "grad_norm": 1.4527184963226318, "learning_rate": 6.835862068965518e-06, "loss": 0.4412, "step": 1655 }, { "epoch": 5.710344827586207, "grad_norm": 0.9702368378639221, "learning_rate": 6.840000000000001e-06, "loss": 0.4355, "step": 1656 }, { "epoch": 5.713793103448276, "grad_norm": 1.1803582906723022, "learning_rate": 6.8441379310344834e-06, "loss": 0.4642, "step": 1657 }, { "epoch": 5.7172413793103445, "grad_norm": 1.4377926588058472, "learning_rate": 6.848275862068966e-06, "loss": 0.4485, "step": 1658 }, { "epoch": 5.720689655172414, "grad_norm": 1.5575882196426392, "learning_rate": 6.852413793103448e-06, "loss": 0.4591, "step": 1659 }, { "epoch": 5.724137931034483, "grad_norm": 1.4030224084854126, "learning_rate": 6.856551724137931e-06, "loss": 0.4566, "step": 1660 }, { "epoch": 5.727586206896552, "grad_norm": 4.2885212898254395, "learning_rate": 6.860689655172414e-06, "loss": 0.4811, "step": 1661 }, { "epoch": 5.731034482758621, "grad_norm": 1.1010860204696655, "learning_rate": 6.864827586206897e-06, "loss": 0.4349, "step": 1662 }, { "epoch": 5.73448275862069, "grad_norm": 1.248012900352478, "learning_rate": 6.86896551724138e-06, "loss": 0.4499, "step": 1663 }, { "epoch": 5.7379310344827585, "grad_norm": 1.2237502336502075, "learning_rate": 6.873103448275863e-06, "loss": 0.466, "step": 1664 }, { "epoch": 5.741379310344827, "grad_norm": 2.315552234649658, "learning_rate": 6.877241379310345e-06, "loss": 0.4827, "step": 1665 }, { "epoch": 5.744827586206896, "grad_norm": 1.1323063373565674, "learning_rate": 6.8813793103448275e-06, "loss": 0.4401, "step": 1666 }, { "epoch": 5.748275862068965, "grad_norm": 1.5824939012527466, "learning_rate": 6.88551724137931e-06, "loss": 0.4547, "step": 1667 }, { "epoch": 5.751724137931035, "grad_norm": 1.6498733758926392, "learning_rate": 6.889655172413793e-06, "loss": 0.4765, "step": 1668 }, { "epoch": 5.755172413793104, "grad_norm": 1.293576955795288, "learning_rate": 6.893793103448276e-06, "loss": 0.4904, "step": 1669 }, { "epoch": 5.758620689655173, "grad_norm": 1.8415969610214233, "learning_rate": 6.897931034482759e-06, "loss": 0.5317, "step": 1670 }, { "epoch": 5.7620689655172415, "grad_norm": 1.3655877113342285, "learning_rate": 6.902068965517241e-06, "loss": 0.5109, "step": 1671 }, { "epoch": 5.76551724137931, "grad_norm": 1.982765793800354, "learning_rate": 6.906206896551724e-06, "loss": 0.4852, "step": 1672 }, { "epoch": 5.768965517241379, "grad_norm": 1.8212971687316895, "learning_rate": 6.910344827586207e-06, "loss": 0.5531, "step": 1673 }, { "epoch": 5.772413793103448, "grad_norm": 2.090747356414795, "learning_rate": 6.91448275862069e-06, "loss": 0.5148, "step": 1674 }, { "epoch": 5.775862068965517, "grad_norm": 3.199458122253418, "learning_rate": 6.9186206896551725e-06, "loss": 0.6538, "step": 1675 }, { "epoch": 5.779310344827586, "grad_norm": 1.9485232830047607, "learning_rate": 6.922758620689655e-06, "loss": 0.4825, "step": 1676 }, { "epoch": 5.7827586206896555, "grad_norm": 1.0584142208099365, "learning_rate": 6.926896551724138e-06, "loss": 0.4734, "step": 1677 }, { "epoch": 5.786206896551724, "grad_norm": 1.171372890472412, "learning_rate": 6.93103448275862e-06, "loss": 0.4336, "step": 1678 }, { "epoch": 5.789655172413793, "grad_norm": 2.2803609371185303, "learning_rate": 6.935172413793103e-06, "loss": 0.4746, "step": 1679 }, { "epoch": 5.793103448275862, "grad_norm": 1.3822022676467896, "learning_rate": 6.939310344827586e-06, "loss": 0.502, "step": 1680 }, { "epoch": 5.796551724137931, "grad_norm": 1.1019196510314941, "learning_rate": 6.943448275862069e-06, "loss": 0.504, "step": 1681 }, { "epoch": 5.8, "grad_norm": 1.0820362567901611, "learning_rate": 6.947586206896552e-06, "loss": 0.5073, "step": 1682 }, { "epoch": 5.803448275862069, "grad_norm": 1.6049963235855103, "learning_rate": 6.951724137931035e-06, "loss": 0.4799, "step": 1683 }, { "epoch": 5.8068965517241375, "grad_norm": 1.6276623010635376, "learning_rate": 6.955862068965517e-06, "loss": 0.4368, "step": 1684 }, { "epoch": 5.810344827586206, "grad_norm": 1.2430660724639893, "learning_rate": 6.96e-06, "loss": 0.4335, "step": 1685 }, { "epoch": 5.813793103448276, "grad_norm": 1.2945219278335571, "learning_rate": 6.964137931034483e-06, "loss": 0.4284, "step": 1686 }, { "epoch": 5.817241379310345, "grad_norm": 1.534535527229309, "learning_rate": 6.968275862068966e-06, "loss": 0.4384, "step": 1687 }, { "epoch": 5.820689655172414, "grad_norm": 1.7102687358856201, "learning_rate": 6.972413793103449e-06, "loss": 0.448, "step": 1688 }, { "epoch": 5.824137931034483, "grad_norm": 1.332966685295105, "learning_rate": 6.976551724137932e-06, "loss": 0.4844, "step": 1689 }, { "epoch": 5.827586206896552, "grad_norm": 2.879549503326416, "learning_rate": 6.980689655172415e-06, "loss": 0.4801, "step": 1690 }, { "epoch": 5.8310344827586205, "grad_norm": 1.591391921043396, "learning_rate": 6.984827586206897e-06, "loss": 0.4183, "step": 1691 }, { "epoch": 5.834482758620689, "grad_norm": 1.4303638935089111, "learning_rate": 6.9889655172413795e-06, "loss": 0.4865, "step": 1692 }, { "epoch": 5.837931034482759, "grad_norm": 1.276718258857727, "learning_rate": 6.993103448275862e-06, "loss": 0.4615, "step": 1693 }, { "epoch": 5.841379310344828, "grad_norm": 1.7517262697219849, "learning_rate": 6.997241379310345e-06, "loss": 0.4783, "step": 1694 }, { "epoch": 5.844827586206897, "grad_norm": 1.4499080181121826, "learning_rate": 7.001379310344828e-06, "loss": 0.4858, "step": 1695 }, { "epoch": 5.848275862068966, "grad_norm": 2.254622459411621, "learning_rate": 7.005517241379311e-06, "loss": 0.4947, "step": 1696 }, { "epoch": 5.8517241379310345, "grad_norm": 2.4839229583740234, "learning_rate": 7.009655172413793e-06, "loss": 0.4911, "step": 1697 }, { "epoch": 5.855172413793103, "grad_norm": 1.7296457290649414, "learning_rate": 7.013793103448276e-06, "loss": 0.503, "step": 1698 }, { "epoch": 5.858620689655172, "grad_norm": 1.5393643379211426, "learning_rate": 7.017931034482759e-06, "loss": 0.5383, "step": 1699 }, { "epoch": 5.862068965517241, "grad_norm": 7.347292423248291, "learning_rate": 7.0220689655172416e-06, "loss": 0.7376, "step": 1700 }, { "epoch": 5.86551724137931, "grad_norm": 1.0987082719802856, "learning_rate": 7.0262068965517244e-06, "loss": 0.5159, "step": 1701 }, { "epoch": 5.86896551724138, "grad_norm": 1.147261381149292, "learning_rate": 7.030344827586207e-06, "loss": 0.4717, "step": 1702 }, { "epoch": 5.872413793103449, "grad_norm": 3.429628610610962, "learning_rate": 7.03448275862069e-06, "loss": 0.4584, "step": 1703 }, { "epoch": 5.875862068965517, "grad_norm": 0.9676830172538757, "learning_rate": 7.038620689655172e-06, "loss": 0.45, "step": 1704 }, { "epoch": 5.879310344827586, "grad_norm": 0.9680139422416687, "learning_rate": 7.042758620689655e-06, "loss": 0.4701, "step": 1705 }, { "epoch": 5.882758620689655, "grad_norm": 1.1813455820083618, "learning_rate": 7.046896551724138e-06, "loss": 0.4319, "step": 1706 }, { "epoch": 5.886206896551724, "grad_norm": 1.1272571086883545, "learning_rate": 7.051034482758621e-06, "loss": 0.4289, "step": 1707 }, { "epoch": 5.889655172413793, "grad_norm": 1.2005163431167603, "learning_rate": 7.055172413793104e-06, "loss": 0.4704, "step": 1708 }, { "epoch": 5.893103448275862, "grad_norm": 1.7335726022720337, "learning_rate": 7.0593103448275865e-06, "loss": 0.4182, "step": 1709 }, { "epoch": 5.896551724137931, "grad_norm": 1.3383119106292725, "learning_rate": 7.0634482758620685e-06, "loss": 0.4552, "step": 1710 }, { "epoch": 5.9, "grad_norm": 1.047334909439087, "learning_rate": 7.067586206896551e-06, "loss": 0.4265, "step": 1711 }, { "epoch": 5.903448275862069, "grad_norm": 1.5752679109573364, "learning_rate": 7.071724137931034e-06, "loss": 0.5382, "step": 1712 }, { "epoch": 5.906896551724138, "grad_norm": 1.3596680164337158, "learning_rate": 7.075862068965517e-06, "loss": 0.4483, "step": 1713 }, { "epoch": 5.910344827586207, "grad_norm": 1.5484777688980103, "learning_rate": 7.08e-06, "loss": 0.492, "step": 1714 }, { "epoch": 5.913793103448276, "grad_norm": 1.65871000289917, "learning_rate": 7.084137931034483e-06, "loss": 0.4561, "step": 1715 }, { "epoch": 5.917241379310345, "grad_norm": 1.6230247020721436, "learning_rate": 7.088275862068966e-06, "loss": 0.5196, "step": 1716 }, { "epoch": 5.9206896551724135, "grad_norm": 1.6984248161315918, "learning_rate": 7.0924137931034486e-06, "loss": 0.4453, "step": 1717 }, { "epoch": 5.924137931034482, "grad_norm": 1.4393740892410278, "learning_rate": 7.0965517241379314e-06, "loss": 0.5142, "step": 1718 }, { "epoch": 5.927586206896551, "grad_norm": 2.118729591369629, "learning_rate": 7.100689655172414e-06, "loss": 0.4735, "step": 1719 }, { "epoch": 5.931034482758621, "grad_norm": 1.8276396989822388, "learning_rate": 7.104827586206897e-06, "loss": 0.4691, "step": 1720 }, { "epoch": 5.93448275862069, "grad_norm": 1.226006031036377, "learning_rate": 7.10896551724138e-06, "loss": 0.4915, "step": 1721 }, { "epoch": 5.937931034482759, "grad_norm": 1.5874773263931274, "learning_rate": 7.113103448275863e-06, "loss": 0.4621, "step": 1722 }, { "epoch": 5.941379310344828, "grad_norm": 2.3777639865875244, "learning_rate": 7.117241379310345e-06, "loss": 0.4723, "step": 1723 }, { "epoch": 5.944827586206896, "grad_norm": 3.2980878353118896, "learning_rate": 7.121379310344828e-06, "loss": 0.5444, "step": 1724 }, { "epoch": 5.948275862068965, "grad_norm": 3.293149948120117, "learning_rate": 7.125517241379311e-06, "loss": 0.6671, "step": 1725 }, { "epoch": 5.951724137931034, "grad_norm": 1.0123192071914673, "learning_rate": 7.1296551724137935e-06, "loss": 0.4744, "step": 1726 }, { "epoch": 5.955172413793104, "grad_norm": 2.6550991535186768, "learning_rate": 7.133793103448276e-06, "loss": 0.4621, "step": 1727 }, { "epoch": 5.958620689655173, "grad_norm": 1.626969814300537, "learning_rate": 7.137931034482759e-06, "loss": 0.428, "step": 1728 }, { "epoch": 5.962068965517242, "grad_norm": 1.4667750597000122, "learning_rate": 7.142068965517241e-06, "loss": 0.4806, "step": 1729 }, { "epoch": 5.9655172413793105, "grad_norm": 1.657148838043213, "learning_rate": 7.146206896551724e-06, "loss": 0.4555, "step": 1730 }, { "epoch": 5.968965517241379, "grad_norm": 1.4756993055343628, "learning_rate": 7.150344827586207e-06, "loss": 0.4397, "step": 1731 }, { "epoch": 5.972413793103448, "grad_norm": 1.3104209899902344, "learning_rate": 7.15448275862069e-06, "loss": 0.4641, "step": 1732 }, { "epoch": 5.975862068965517, "grad_norm": 2.283639669418335, "learning_rate": 7.158620689655173e-06, "loss": 0.4449, "step": 1733 }, { "epoch": 5.979310344827586, "grad_norm": 1.8499162197113037, "learning_rate": 7.162758620689656e-06, "loss": 0.4934, "step": 1734 }, { "epoch": 5.982758620689655, "grad_norm": 1.7131431102752686, "learning_rate": 7.1668965517241384e-06, "loss": 0.4964, "step": 1735 }, { "epoch": 5.9862068965517246, "grad_norm": 1.3956177234649658, "learning_rate": 7.1710344827586205e-06, "loss": 0.4292, "step": 1736 }, { "epoch": 5.989655172413793, "grad_norm": 1.827849268913269, "learning_rate": 7.175172413793103e-06, "loss": 0.5145, "step": 1737 }, { "epoch": 5.993103448275862, "grad_norm": 12.623188972473145, "learning_rate": 7.179310344827586e-06, "loss": 0.523, "step": 1738 }, { "epoch": 5.996551724137931, "grad_norm": 2.150559663772583, "learning_rate": 7.183448275862069e-06, "loss": 0.5182, "step": 1739 }, { "epoch": 6.0, "grad_norm": 2.8661959171295166, "learning_rate": 7.187586206896552e-06, "loss": 0.6173, "step": 1740 }, { "epoch": 6.003448275862069, "grad_norm": 1.2588483095169067, "learning_rate": 7.191724137931035e-06, "loss": 0.5171, "step": 1741 }, { "epoch": 6.006896551724138, "grad_norm": 12.174965858459473, "learning_rate": 7.195862068965517e-06, "loss": 0.4253, "step": 1742 }, { "epoch": 6.010344827586207, "grad_norm": 1.9217333793640137, "learning_rate": 7.2e-06, "loss": 0.4424, "step": 1743 }, { "epoch": 6.0137931034482754, "grad_norm": 1.6172150373458862, "learning_rate": 7.2041379310344825e-06, "loss": 0.4475, "step": 1744 }, { "epoch": 6.017241379310345, "grad_norm": 1.144660234451294, "learning_rate": 7.208275862068965e-06, "loss": 0.4244, "step": 1745 }, { "epoch": 6.020689655172414, "grad_norm": 1.154052972793579, "learning_rate": 7.212413793103448e-06, "loss": 0.4264, "step": 1746 }, { "epoch": 6.024137931034483, "grad_norm": 1.6172412633895874, "learning_rate": 7.216551724137931e-06, "loss": 0.477, "step": 1747 }, { "epoch": 6.027586206896552, "grad_norm": 1.2110612392425537, "learning_rate": 7.220689655172414e-06, "loss": 0.4643, "step": 1748 }, { "epoch": 6.031034482758621, "grad_norm": 1.0927765369415283, "learning_rate": 7.224827586206897e-06, "loss": 0.4183, "step": 1749 }, { "epoch": 6.0344827586206895, "grad_norm": 1.1982754468917847, "learning_rate": 7.22896551724138e-06, "loss": 0.4505, "step": 1750 }, { "epoch": 6.037931034482758, "grad_norm": 1.26279878616333, "learning_rate": 7.233103448275863e-06, "loss": 0.4546, "step": 1751 }, { "epoch": 6.041379310344827, "grad_norm": 1.4373584985733032, "learning_rate": 7.2372413793103455e-06, "loss": 0.4429, "step": 1752 }, { "epoch": 6.044827586206897, "grad_norm": 1.0653109550476074, "learning_rate": 7.241379310344828e-06, "loss": 0.4675, "step": 1753 }, { "epoch": 6.048275862068966, "grad_norm": 1.0887954235076904, "learning_rate": 7.245517241379311e-06, "loss": 0.4371, "step": 1754 }, { "epoch": 6.051724137931035, "grad_norm": 1.3239058256149292, "learning_rate": 7.249655172413793e-06, "loss": 0.4597, "step": 1755 }, { "epoch": 6.055172413793104, "grad_norm": 1.3109138011932373, "learning_rate": 7.253793103448276e-06, "loss": 0.4497, "step": 1756 }, { "epoch": 6.058620689655172, "grad_norm": 1.4553513526916504, "learning_rate": 7.257931034482759e-06, "loss": 0.4541, "step": 1757 }, { "epoch": 6.062068965517241, "grad_norm": 2.7103683948516846, "learning_rate": 7.262068965517242e-06, "loss": 0.4659, "step": 1758 }, { "epoch": 6.06551724137931, "grad_norm": 1.4639155864715576, "learning_rate": 7.266206896551725e-06, "loss": 0.4585, "step": 1759 }, { "epoch": 6.068965517241379, "grad_norm": 1.464715600013733, "learning_rate": 7.2703448275862075e-06, "loss": 0.468, "step": 1760 }, { "epoch": 6.072413793103448, "grad_norm": 2.106896162033081, "learning_rate": 7.2744827586206896e-06, "loss": 0.4863, "step": 1761 }, { "epoch": 6.075862068965518, "grad_norm": 1.4718941450119019, "learning_rate": 7.278620689655172e-06, "loss": 0.4648, "step": 1762 }, { "epoch": 6.0793103448275865, "grad_norm": 6.951113224029541, "learning_rate": 7.282758620689655e-06, "loss": 0.5351, "step": 1763 }, { "epoch": 6.082758620689655, "grad_norm": 1.9060499668121338, "learning_rate": 7.286896551724138e-06, "loss": 0.5694, "step": 1764 }, { "epoch": 6.086206896551724, "grad_norm": 2.5222835540771484, "learning_rate": 7.291034482758621e-06, "loss": 0.6622, "step": 1765 }, { "epoch": 6.089655172413793, "grad_norm": 1.3859436511993408, "learning_rate": 7.295172413793104e-06, "loss": 0.531, "step": 1766 }, { "epoch": 6.093103448275862, "grad_norm": 0.9988785982131958, "learning_rate": 7.299310344827587e-06, "loss": 0.4492, "step": 1767 }, { "epoch": 6.096551724137931, "grad_norm": 1.1429290771484375, "learning_rate": 7.303448275862069e-06, "loss": 0.43, "step": 1768 }, { "epoch": 6.1, "grad_norm": 1.5116466283798218, "learning_rate": 7.307586206896552e-06, "loss": 0.4508, "step": 1769 }, { "epoch": 6.103448275862069, "grad_norm": 1.1572203636169434, "learning_rate": 7.3117241379310345e-06, "loss": 0.4263, "step": 1770 }, { "epoch": 6.106896551724138, "grad_norm": 1.057957649230957, "learning_rate": 7.315862068965517e-06, "loss": 0.4637, "step": 1771 }, { "epoch": 6.110344827586207, "grad_norm": 1.4771660566329956, "learning_rate": 7.32e-06, "loss": 0.4152, "step": 1772 }, { "epoch": 6.113793103448276, "grad_norm": 1.3904917240142822, "learning_rate": 7.324137931034483e-06, "loss": 0.4586, "step": 1773 }, { "epoch": 6.117241379310345, "grad_norm": 1.1829544305801392, "learning_rate": 7.328275862068965e-06, "loss": 0.4061, "step": 1774 }, { "epoch": 6.120689655172414, "grad_norm": 1.3177804946899414, "learning_rate": 7.332413793103448e-06, "loss": 0.4135, "step": 1775 }, { "epoch": 6.124137931034483, "grad_norm": 1.2885785102844238, "learning_rate": 7.336551724137931e-06, "loss": 0.4754, "step": 1776 }, { "epoch": 6.127586206896551, "grad_norm": 1.2926145792007446, "learning_rate": 7.340689655172414e-06, "loss": 0.453, "step": 1777 }, { "epoch": 6.13103448275862, "grad_norm": 1.1650419235229492, "learning_rate": 7.3448275862068966e-06, "loss": 0.4234, "step": 1778 }, { "epoch": 6.13448275862069, "grad_norm": 1.5440806150436401, "learning_rate": 7.3489655172413794e-06, "loss": 0.3786, "step": 1779 }, { "epoch": 6.137931034482759, "grad_norm": 3.33648943901062, "learning_rate": 7.353103448275862e-06, "loss": 0.5134, "step": 1780 }, { "epoch": 6.141379310344828, "grad_norm": 1.4153822660446167, "learning_rate": 7.357241379310344e-06, "loss": 0.4363, "step": 1781 }, { "epoch": 6.144827586206897, "grad_norm": 1.724287509918213, "learning_rate": 7.361379310344827e-06, "loss": 0.411, "step": 1782 }, { "epoch": 6.1482758620689655, "grad_norm": 1.4622722864151, "learning_rate": 7.365517241379311e-06, "loss": 0.4554, "step": 1783 }, { "epoch": 6.151724137931034, "grad_norm": 1.7076319456100464, "learning_rate": 7.369655172413794e-06, "loss": 0.4715, "step": 1784 }, { "epoch": 6.155172413793103, "grad_norm": 2.215895652770996, "learning_rate": 7.373793103448277e-06, "loss": 0.4985, "step": 1785 }, { "epoch": 6.158620689655172, "grad_norm": 1.3390394449234009, "learning_rate": 7.3779310344827595e-06, "loss": 0.5082, "step": 1786 }, { "epoch": 6.162068965517241, "grad_norm": 1.8730783462524414, "learning_rate": 7.3820689655172415e-06, "loss": 0.4886, "step": 1787 }, { "epoch": 6.165517241379311, "grad_norm": 1.3590954542160034, "learning_rate": 7.386206896551724e-06, "loss": 0.5463, "step": 1788 }, { "epoch": 6.1689655172413795, "grad_norm": 2.6106252670288086, "learning_rate": 7.390344827586207e-06, "loss": 0.5776, "step": 1789 }, { "epoch": 6.172413793103448, "grad_norm": 3.7982916831970215, "learning_rate": 7.39448275862069e-06, "loss": 0.6232, "step": 1790 }, { "epoch": 6.175862068965517, "grad_norm": 1.8319096565246582, "learning_rate": 7.398620689655173e-06, "loss": 0.5456, "step": 1791 }, { "epoch": 6.179310344827586, "grad_norm": 1.000929594039917, "learning_rate": 7.402758620689656e-06, "loss": 0.4825, "step": 1792 }, { "epoch": 6.182758620689655, "grad_norm": 0.9324603080749512, "learning_rate": 7.406896551724138e-06, "loss": 0.4171, "step": 1793 }, { "epoch": 6.186206896551724, "grad_norm": 1.1726646423339844, "learning_rate": 7.411034482758621e-06, "loss": 0.4191, "step": 1794 }, { "epoch": 6.189655172413793, "grad_norm": 1.279012680053711, "learning_rate": 7.4151724137931036e-06, "loss": 0.444, "step": 1795 }, { "epoch": 6.1931034482758625, "grad_norm": 1.0217193365097046, "learning_rate": 7.4193103448275864e-06, "loss": 0.4392, "step": 1796 }, { "epoch": 6.196551724137931, "grad_norm": 1.4255043268203735, "learning_rate": 7.423448275862069e-06, "loss": 0.4387, "step": 1797 }, { "epoch": 6.2, "grad_norm": 1.6051775217056274, "learning_rate": 7.427586206896552e-06, "loss": 0.4537, "step": 1798 }, { "epoch": 6.203448275862069, "grad_norm": 1.2342811822891235, "learning_rate": 7.431724137931035e-06, "loss": 0.4358, "step": 1799 }, { "epoch": 6.206896551724138, "grad_norm": 1.0355534553527832, "learning_rate": 7.435862068965517e-06, "loss": 0.4194, "step": 1800 }, { "epoch": 6.210344827586207, "grad_norm": 1.932559847831726, "learning_rate": 7.44e-06, "loss": 0.4427, "step": 1801 }, { "epoch": 6.213793103448276, "grad_norm": 1.5107063055038452, "learning_rate": 7.444137931034483e-06, "loss": 0.4699, "step": 1802 }, { "epoch": 6.2172413793103445, "grad_norm": 1.33786141872406, "learning_rate": 7.448275862068966e-06, "loss": 0.4004, "step": 1803 }, { "epoch": 6.220689655172414, "grad_norm": 1.4253383874893188, "learning_rate": 7.4524137931034485e-06, "loss": 0.454, "step": 1804 }, { "epoch": 6.224137931034483, "grad_norm": 1.094265103340149, "learning_rate": 7.456551724137931e-06, "loss": 0.4165, "step": 1805 }, { "epoch": 6.227586206896552, "grad_norm": 1.199282169342041, "learning_rate": 7.460689655172413e-06, "loss": 0.4568, "step": 1806 }, { "epoch": 6.231034482758621, "grad_norm": 6.772141933441162, "learning_rate": 7.464827586206896e-06, "loss": 0.428, "step": 1807 }, { "epoch": 6.23448275862069, "grad_norm": 1.5948495864868164, "learning_rate": 7.468965517241379e-06, "loss": 0.4423, "step": 1808 }, { "epoch": 6.2379310344827585, "grad_norm": 1.9637757539749146, "learning_rate": 7.473103448275862e-06, "loss": 0.4434, "step": 1809 }, { "epoch": 6.241379310344827, "grad_norm": 1.9107508659362793, "learning_rate": 7.477241379310345e-06, "loss": 0.4504, "step": 1810 }, { "epoch": 6.244827586206896, "grad_norm": 1.3942897319793701, "learning_rate": 7.481379310344828e-06, "loss": 0.4386, "step": 1811 }, { "epoch": 6.248275862068965, "grad_norm": 2.102928638458252, "learning_rate": 7.485517241379311e-06, "loss": 0.4963, "step": 1812 }, { "epoch": 6.251724137931035, "grad_norm": 2.523333787918091, "learning_rate": 7.489655172413793e-06, "loss": 0.4701, "step": 1813 }, { "epoch": 6.255172413793104, "grad_norm": 1.9395320415496826, "learning_rate": 7.4937931034482755e-06, "loss": 0.526, "step": 1814 }, { "epoch": 6.258620689655173, "grad_norm": 3.1823031902313232, "learning_rate": 7.497931034482758e-06, "loss": 0.6605, "step": 1815 }, { "epoch": 6.2620689655172415, "grad_norm": 1.3177138566970825, "learning_rate": 7.502068965517242e-06, "loss": 0.5158, "step": 1816 }, { "epoch": 6.26551724137931, "grad_norm": 0.956541121006012, "learning_rate": 7.506206896551724e-06, "loss": 0.4401, "step": 1817 }, { "epoch": 6.268965517241379, "grad_norm": 1.0911511182785034, "learning_rate": 7.510344827586208e-06, "loss": 0.4518, "step": 1818 }, { "epoch": 6.272413793103448, "grad_norm": 1.0516554117202759, "learning_rate": 7.514482758620689e-06, "loss": 0.4126, "step": 1819 }, { "epoch": 6.275862068965517, "grad_norm": 1.1296405792236328, "learning_rate": 7.518620689655173e-06, "loss": 0.4046, "step": 1820 }, { "epoch": 6.279310344827586, "grad_norm": 0.9647020697593689, "learning_rate": 7.522758620689655e-06, "loss": 0.4328, "step": 1821 }, { "epoch": 6.2827586206896555, "grad_norm": 1.8128739595413208, "learning_rate": 7.526896551724138e-06, "loss": 0.4669, "step": 1822 }, { "epoch": 6.286206896551724, "grad_norm": 0.9126299619674683, "learning_rate": 7.53103448275862e-06, "loss": 0.4204, "step": 1823 }, { "epoch": 6.289655172413793, "grad_norm": 0.9990894198417664, "learning_rate": 7.535172413793104e-06, "loss": 0.4461, "step": 1824 }, { "epoch": 6.293103448275862, "grad_norm": 0.9377634525299072, "learning_rate": 7.539310344827586e-06, "loss": 0.4156, "step": 1825 }, { "epoch": 6.296551724137931, "grad_norm": 1.071236252784729, "learning_rate": 7.543448275862069e-06, "loss": 0.419, "step": 1826 }, { "epoch": 6.3, "grad_norm": 1.5957380533218384, "learning_rate": 7.547586206896551e-06, "loss": 0.4225, "step": 1827 }, { "epoch": 6.303448275862069, "grad_norm": 1.022287130355835, "learning_rate": 7.551724137931035e-06, "loss": 0.4526, "step": 1828 }, { "epoch": 6.3068965517241375, "grad_norm": 1.0816417932510376, "learning_rate": 7.555862068965517e-06, "loss": 0.4748, "step": 1829 }, { "epoch": 6.310344827586207, "grad_norm": 1.346498966217041, "learning_rate": 7.5600000000000005e-06, "loss": 0.45, "step": 1830 }, { "epoch": 6.313793103448276, "grad_norm": 1.395726203918457, "learning_rate": 7.5641379310344825e-06, "loss": 0.41, "step": 1831 }, { "epoch": 6.317241379310345, "grad_norm": 2.8851916790008545, "learning_rate": 7.568275862068965e-06, "loss": 0.439, "step": 1832 }, { "epoch": 6.320689655172414, "grad_norm": 2.471554756164551, "learning_rate": 7.572413793103449e-06, "loss": 0.4587, "step": 1833 }, { "epoch": 6.324137931034483, "grad_norm": 2.1495602130889893, "learning_rate": 7.576551724137931e-06, "loss": 0.4347, "step": 1834 }, { "epoch": 6.327586206896552, "grad_norm": 2.8960909843444824, "learning_rate": 7.580689655172415e-06, "loss": 0.4971, "step": 1835 }, { "epoch": 6.3310344827586205, "grad_norm": 1.6487430334091187, "learning_rate": 7.584827586206897e-06, "loss": 0.5044, "step": 1836 }, { "epoch": 6.334482758620689, "grad_norm": 1.5680551528930664, "learning_rate": 7.5889655172413805e-06, "loss": 0.4663, "step": 1837 }, { "epoch": 6.337931034482759, "grad_norm": 4.973691463470459, "learning_rate": 7.5931034482758625e-06, "loss": 0.4984, "step": 1838 }, { "epoch": 6.341379310344828, "grad_norm": 4.204576015472412, "learning_rate": 7.597241379310345e-06, "loss": 0.5368, "step": 1839 }, { "epoch": 6.344827586206897, "grad_norm": 4.3128662109375, "learning_rate": 7.601379310344827e-06, "loss": 0.7112, "step": 1840 }, { "epoch": 6.348275862068966, "grad_norm": 1.2111581563949585, "learning_rate": 7.605517241379311e-06, "loss": 0.5051, "step": 1841 }, { "epoch": 6.3517241379310345, "grad_norm": 1.0832321643829346, "learning_rate": 7.609655172413793e-06, "loss": 0.4339, "step": 1842 }, { "epoch": 6.355172413793103, "grad_norm": 1.000520944595337, "learning_rate": 7.613793103448277e-06, "loss": 0.4739, "step": 1843 }, { "epoch": 6.358620689655172, "grad_norm": 1.1550137996673584, "learning_rate": 7.617931034482759e-06, "loss": 0.471, "step": 1844 }, { "epoch": 6.362068965517241, "grad_norm": 0.8950406908988953, "learning_rate": 7.622068965517242e-06, "loss": 0.4151, "step": 1845 }, { "epoch": 6.36551724137931, "grad_norm": 0.8727495074272156, "learning_rate": 7.626206896551724e-06, "loss": 0.436, "step": 1846 }, { "epoch": 6.36896551724138, "grad_norm": 3.5245862007141113, "learning_rate": 7.630344827586208e-06, "loss": 0.4538, "step": 1847 }, { "epoch": 6.372413793103449, "grad_norm": 1.1518508195877075, "learning_rate": 7.634482758620689e-06, "loss": 0.4479, "step": 1848 }, { "epoch": 6.375862068965517, "grad_norm": 1.2216664552688599, "learning_rate": 7.638620689655172e-06, "loss": 0.4144, "step": 1849 }, { "epoch": 6.379310344827586, "grad_norm": 1.125431776046753, "learning_rate": 7.642758620689654e-06, "loss": 0.4266, "step": 1850 }, { "epoch": 6.382758620689655, "grad_norm": 1.1443895101547241, "learning_rate": 7.646896551724138e-06, "loss": 0.4124, "step": 1851 }, { "epoch": 6.386206896551724, "grad_norm": 1.1905760765075684, "learning_rate": 7.65103448275862e-06, "loss": 0.4625, "step": 1852 }, { "epoch": 6.389655172413793, "grad_norm": 1.7239415645599365, "learning_rate": 7.655172413793104e-06, "loss": 0.4357, "step": 1853 }, { "epoch": 6.393103448275862, "grad_norm": 1.371217966079712, "learning_rate": 7.659310344827586e-06, "loss": 0.4305, "step": 1854 }, { "epoch": 6.396551724137931, "grad_norm": 1.6212927103042603, "learning_rate": 7.66344827586207e-06, "loss": 0.4268, "step": 1855 }, { "epoch": 6.4, "grad_norm": 1.375596523284912, "learning_rate": 7.667586206896552e-06, "loss": 0.4406, "step": 1856 }, { "epoch": 6.403448275862069, "grad_norm": 1.2995448112487793, "learning_rate": 7.671724137931035e-06, "loss": 0.4094, "step": 1857 }, { "epoch": 6.406896551724138, "grad_norm": 1.5699437856674194, "learning_rate": 7.675862068965517e-06, "loss": 0.4829, "step": 1858 }, { "epoch": 6.410344827586207, "grad_norm": 1.2842278480529785, "learning_rate": 7.680000000000001e-06, "loss": 0.4683, "step": 1859 }, { "epoch": 6.413793103448276, "grad_norm": 1.6034643650054932, "learning_rate": 7.684137931034483e-06, "loss": 0.4567, "step": 1860 }, { "epoch": 6.417241379310345, "grad_norm": 1.3973926305770874, "learning_rate": 7.688275862068965e-06, "loss": 0.4939, "step": 1861 }, { "epoch": 6.4206896551724135, "grad_norm": 2.5652365684509277, "learning_rate": 7.692413793103447e-06, "loss": 0.4953, "step": 1862 }, { "epoch": 6.424137931034482, "grad_norm": 3.137583017349243, "learning_rate": 7.69655172413793e-06, "loss": 0.5267, "step": 1863 }, { "epoch": 6.427586206896552, "grad_norm": 2.8841917514801025, "learning_rate": 7.700689655172413e-06, "loss": 0.5399, "step": 1864 }, { "epoch": 6.431034482758621, "grad_norm": 2.9158432483673096, "learning_rate": 7.704827586206897e-06, "loss": 0.6919, "step": 1865 }, { "epoch": 6.43448275862069, "grad_norm": 1.1688828468322754, "learning_rate": 7.70896551724138e-06, "loss": 0.4593, "step": 1866 }, { "epoch": 6.437931034482759, "grad_norm": 0.9948346018791199, "learning_rate": 7.713103448275862e-06, "loss": 0.4255, "step": 1867 }, { "epoch": 6.441379310344828, "grad_norm": 0.901128351688385, "learning_rate": 7.717241379310346e-06, "loss": 0.4502, "step": 1868 }, { "epoch": 6.444827586206896, "grad_norm": 1.697975754737854, "learning_rate": 7.721379310344828e-06, "loss": 0.4426, "step": 1869 }, { "epoch": 6.448275862068965, "grad_norm": 1.2398977279663086, "learning_rate": 7.725517241379312e-06, "loss": 0.4562, "step": 1870 }, { "epoch": 6.451724137931034, "grad_norm": 0.9653956890106201, "learning_rate": 7.729655172413794e-06, "loss": 0.4233, "step": 1871 }, { "epoch": 6.455172413793104, "grad_norm": 0.8878656029701233, "learning_rate": 7.733793103448277e-06, "loss": 0.4518, "step": 1872 }, { "epoch": 6.458620689655173, "grad_norm": 0.9810324907302856, "learning_rate": 7.73793103448276e-06, "loss": 0.434, "step": 1873 }, { "epoch": 6.462068965517242, "grad_norm": 0.9131942391395569, "learning_rate": 7.742068965517241e-06, "loss": 0.4384, "step": 1874 }, { "epoch": 6.4655172413793105, "grad_norm": 1.1483972072601318, "learning_rate": 7.746206896551723e-06, "loss": 0.4399, "step": 1875 }, { "epoch": 6.468965517241379, "grad_norm": 1.0051722526550293, "learning_rate": 7.750344827586207e-06, "loss": 0.4405, "step": 1876 }, { "epoch": 6.472413793103448, "grad_norm": 1.305537462234497, "learning_rate": 7.75448275862069e-06, "loss": 0.4476, "step": 1877 }, { "epoch": 6.475862068965517, "grad_norm": 0.9757444858551025, "learning_rate": 7.758620689655173e-06, "loss": 0.4252, "step": 1878 }, { "epoch": 6.479310344827586, "grad_norm": 1.3771448135375977, "learning_rate": 7.762758620689655e-06, "loss": 0.4312, "step": 1879 }, { "epoch": 6.482758620689655, "grad_norm": 1.2436691522598267, "learning_rate": 7.766896551724139e-06, "loss": 0.4442, "step": 1880 }, { "epoch": 6.4862068965517246, "grad_norm": 1.2843773365020752, "learning_rate": 7.77103448275862e-06, "loss": 0.4341, "step": 1881 }, { "epoch": 6.489655172413793, "grad_norm": 1.7602401971817017, "learning_rate": 7.775172413793104e-06, "loss": 0.436, "step": 1882 }, { "epoch": 6.493103448275862, "grad_norm": 1.51511812210083, "learning_rate": 7.779310344827586e-06, "loss": 0.4548, "step": 1883 }, { "epoch": 6.496551724137931, "grad_norm": 3.2985241413116455, "learning_rate": 7.78344827586207e-06, "loss": 0.4208, "step": 1884 }, { "epoch": 6.5, "grad_norm": 1.6032986640930176, "learning_rate": 7.787586206896552e-06, "loss": 0.4884, "step": 1885 }, { "epoch": 6.503448275862069, "grad_norm": 1.5273494720458984, "learning_rate": 7.791724137931036e-06, "loss": 0.4956, "step": 1886 }, { "epoch": 6.506896551724138, "grad_norm": 1.910089135169983, "learning_rate": 7.795862068965516e-06, "loss": 0.416, "step": 1887 }, { "epoch": 6.510344827586207, "grad_norm": 1.8113442659378052, "learning_rate": 7.8e-06, "loss": 0.4977, "step": 1888 }, { "epoch": 6.5137931034482754, "grad_norm": 1.72111177444458, "learning_rate": 7.804137931034482e-06, "loss": 0.4891, "step": 1889 }, { "epoch": 6.517241379310345, "grad_norm": 4.048909664154053, "learning_rate": 7.808275862068966e-06, "loss": 0.6711, "step": 1890 }, { "epoch": 6.520689655172414, "grad_norm": 0.9310922026634216, "learning_rate": 7.812413793103448e-06, "loss": 0.4802, "step": 1891 }, { "epoch": 6.524137931034483, "grad_norm": 0.8052543997764587, "learning_rate": 7.816551724137931e-06, "loss": 0.4107, "step": 1892 }, { "epoch": 6.527586206896552, "grad_norm": 0.851148247718811, "learning_rate": 7.820689655172413e-06, "loss": 0.4276, "step": 1893 }, { "epoch": 6.531034482758621, "grad_norm": 1.011743426322937, "learning_rate": 7.824827586206897e-06, "loss": 0.4213, "step": 1894 }, { "epoch": 6.5344827586206895, "grad_norm": 0.9687141180038452, "learning_rate": 7.828965517241379e-06, "loss": 0.4241, "step": 1895 }, { "epoch": 6.537931034482758, "grad_norm": 1.0311472415924072, "learning_rate": 7.833103448275863e-06, "loss": 0.4203, "step": 1896 }, { "epoch": 6.541379310344827, "grad_norm": 1.2286514043807983, "learning_rate": 7.837241379310345e-06, "loss": 0.4318, "step": 1897 }, { "epoch": 6.544827586206896, "grad_norm": 1.0774725675582886, "learning_rate": 7.841379310344829e-06, "loss": 0.4493, "step": 1898 }, { "epoch": 6.548275862068966, "grad_norm": 1.1984244585037231, "learning_rate": 7.84551724137931e-06, "loss": 0.3948, "step": 1899 }, { "epoch": 6.551724137931035, "grad_norm": 1.0737570524215698, "learning_rate": 7.849655172413793e-06, "loss": 0.4147, "step": 1900 }, { "epoch": 6.555172413793104, "grad_norm": 3.3427727222442627, "learning_rate": 7.853793103448276e-06, "loss": 0.4823, "step": 1901 }, { "epoch": 6.558620689655172, "grad_norm": 0.9783851504325867, "learning_rate": 7.857931034482758e-06, "loss": 0.4207, "step": 1902 }, { "epoch": 6.562068965517241, "grad_norm": 1.1286373138427734, "learning_rate": 7.862068965517242e-06, "loss": 0.4422, "step": 1903 }, { "epoch": 6.56551724137931, "grad_norm": 1.236361026763916, "learning_rate": 7.866206896551724e-06, "loss": 0.4187, "step": 1904 }, { "epoch": 6.568965517241379, "grad_norm": 1.250390648841858, "learning_rate": 7.870344827586208e-06, "loss": 0.4722, "step": 1905 }, { "epoch": 6.572413793103449, "grad_norm": 1.2168232202529907, "learning_rate": 7.87448275862069e-06, "loss": 0.4145, "step": 1906 }, { "epoch": 6.575862068965518, "grad_norm": 1.1007716655731201, "learning_rate": 7.878620689655173e-06, "loss": 0.4513, "step": 1907 }, { "epoch": 6.5793103448275865, "grad_norm": 1.5002987384796143, "learning_rate": 7.882758620689655e-06, "loss": 0.4282, "step": 1908 }, { "epoch": 6.582758620689655, "grad_norm": 1.357816457748413, "learning_rate": 7.88689655172414e-06, "loss": 0.4511, "step": 1909 }, { "epoch": 6.586206896551724, "grad_norm": 1.4978229999542236, "learning_rate": 7.891034482758621e-06, "loss": 0.4712, "step": 1910 }, { "epoch": 6.589655172413793, "grad_norm": 2.155801296234131, "learning_rate": 7.895172413793105e-06, "loss": 0.4553, "step": 1911 }, { "epoch": 6.593103448275862, "grad_norm": 1.697954535484314, "learning_rate": 7.899310344827585e-06, "loss": 0.4308, "step": 1912 }, { "epoch": 6.596551724137931, "grad_norm": 1.2884629964828491, "learning_rate": 7.903448275862069e-06, "loss": 0.4352, "step": 1913 }, { "epoch": 6.6, "grad_norm": 2.1366684436798096, "learning_rate": 7.907586206896551e-06, "loss": 0.5028, "step": 1914 }, { "epoch": 6.603448275862069, "grad_norm": 2.1035518646240234, "learning_rate": 7.911724137931035e-06, "loss": 0.6375, "step": 1915 }, { "epoch": 6.606896551724138, "grad_norm": 0.8525902628898621, "learning_rate": 7.915862068965517e-06, "loss": 0.503, "step": 1916 }, { "epoch": 6.610344827586207, "grad_norm": 1.0258289575576782, "learning_rate": 7.92e-06, "loss": 0.4172, "step": 1917 }, { "epoch": 6.613793103448276, "grad_norm": 1.020121693611145, "learning_rate": 7.924137931034482e-06, "loss": 0.443, "step": 1918 }, { "epoch": 6.617241379310345, "grad_norm": 1.3062039613723755, "learning_rate": 7.928275862068966e-06, "loss": 0.4378, "step": 1919 }, { "epoch": 6.620689655172414, "grad_norm": 1.3043971061706543, "learning_rate": 7.932413793103448e-06, "loss": 0.46, "step": 1920 }, { "epoch": 6.624137931034483, "grad_norm": 1.0038635730743408, "learning_rate": 7.936551724137932e-06, "loss": 0.4142, "step": 1921 }, { "epoch": 6.627586206896551, "grad_norm": 1.0089762210845947, "learning_rate": 7.940689655172414e-06, "loss": 0.4403, "step": 1922 }, { "epoch": 6.63103448275862, "grad_norm": 0.9384552836418152, "learning_rate": 7.944827586206898e-06, "loss": 0.3711, "step": 1923 }, { "epoch": 6.63448275862069, "grad_norm": 1.216245412826538, "learning_rate": 7.94896551724138e-06, "loss": 0.4618, "step": 1924 }, { "epoch": 6.637931034482759, "grad_norm": 1.4416035413742065, "learning_rate": 7.953103448275862e-06, "loss": 0.4432, "step": 1925 }, { "epoch": 6.641379310344828, "grad_norm": 1.4865615367889404, "learning_rate": 7.957241379310344e-06, "loss": 0.4075, "step": 1926 }, { "epoch": 6.644827586206897, "grad_norm": 1.498138666152954, "learning_rate": 7.961379310344827e-06, "loss": 0.4484, "step": 1927 }, { "epoch": 6.6482758620689655, "grad_norm": 1.380524754524231, "learning_rate": 7.96551724137931e-06, "loss": 0.4239, "step": 1928 }, { "epoch": 6.651724137931034, "grad_norm": 1.128298044204712, "learning_rate": 7.969655172413793e-06, "loss": 0.4648, "step": 1929 }, { "epoch": 6.655172413793103, "grad_norm": 1.0976680517196655, "learning_rate": 7.973793103448275e-06, "loss": 0.4519, "step": 1930 }, { "epoch": 6.658620689655172, "grad_norm": 1.4314086437225342, "learning_rate": 7.977931034482759e-06, "loss": 0.4207, "step": 1931 }, { "epoch": 6.662068965517241, "grad_norm": 4.193882465362549, "learning_rate": 7.982068965517243e-06, "loss": 0.4449, "step": 1932 }, { "epoch": 6.665517241379311, "grad_norm": 1.3431588411331177, "learning_rate": 7.986206896551725e-06, "loss": 0.4492, "step": 1933 }, { "epoch": 6.6689655172413795, "grad_norm": 1.3239859342575073, "learning_rate": 7.990344827586208e-06, "loss": 0.4457, "step": 1934 }, { "epoch": 6.672413793103448, "grad_norm": 1.5766757726669312, "learning_rate": 7.99448275862069e-06, "loss": 0.4351, "step": 1935 }, { "epoch": 6.675862068965517, "grad_norm": 1.291387677192688, "learning_rate": 7.998620689655174e-06, "loss": 0.4752, "step": 1936 }, { "epoch": 6.679310344827586, "grad_norm": 1.9164215326309204, "learning_rate": 8.002758620689656e-06, "loss": 0.4883, "step": 1937 }, { "epoch": 6.682758620689655, "grad_norm": 3.234518051147461, "learning_rate": 8.006896551724138e-06, "loss": 0.451, "step": 1938 }, { "epoch": 6.686206896551724, "grad_norm": 1.6610294580459595, "learning_rate": 8.01103448275862e-06, "loss": 0.5136, "step": 1939 }, { "epoch": 6.689655172413794, "grad_norm": 1.8641180992126465, "learning_rate": 8.015172413793104e-06, "loss": 0.6648, "step": 1940 }, { "epoch": 6.6931034482758625, "grad_norm": 1.1014678478240967, "learning_rate": 8.019310344827586e-06, "loss": 0.4309, "step": 1941 }, { "epoch": 6.696551724137931, "grad_norm": 0.9507203698158264, "learning_rate": 8.02344827586207e-06, "loss": 0.4528, "step": 1942 }, { "epoch": 6.7, "grad_norm": 1.5334193706512451, "learning_rate": 8.027586206896552e-06, "loss": 0.4443, "step": 1943 }, { "epoch": 6.703448275862069, "grad_norm": 0.8097375631332397, "learning_rate": 8.031724137931035e-06, "loss": 0.4473, "step": 1944 }, { "epoch": 6.706896551724138, "grad_norm": 1.2141788005828857, "learning_rate": 8.035862068965517e-06, "loss": 0.4332, "step": 1945 }, { "epoch": 6.710344827586207, "grad_norm": 0.9710454344749451, "learning_rate": 8.040000000000001e-06, "loss": 0.4368, "step": 1946 }, { "epoch": 6.713793103448276, "grad_norm": 0.8481993675231934, "learning_rate": 8.044137931034483e-06, "loss": 0.3943, "step": 1947 }, { "epoch": 6.7172413793103445, "grad_norm": 1.3102105855941772, "learning_rate": 8.048275862068967e-06, "loss": 0.4125, "step": 1948 }, { "epoch": 6.720689655172414, "grad_norm": 3.9923646450042725, "learning_rate": 8.052413793103449e-06, "loss": 0.4237, "step": 1949 }, { "epoch": 6.724137931034483, "grad_norm": 1.192048192024231, "learning_rate": 8.056551724137932e-06, "loss": 0.4053, "step": 1950 }, { "epoch": 6.727586206896552, "grad_norm": 1.1380245685577393, "learning_rate": 8.060689655172413e-06, "loss": 0.4182, "step": 1951 }, { "epoch": 6.731034482758621, "grad_norm": 1.4284569025039673, "learning_rate": 8.064827586206896e-06, "loss": 0.4303, "step": 1952 }, { "epoch": 6.73448275862069, "grad_norm": 1.2512043714523315, "learning_rate": 8.068965517241378e-06, "loss": 0.3974, "step": 1953 }, { "epoch": 6.7379310344827585, "grad_norm": 2.518618106842041, "learning_rate": 8.073103448275862e-06, "loss": 0.4473, "step": 1954 }, { "epoch": 6.741379310344827, "grad_norm": 1.4199706315994263, "learning_rate": 8.077241379310344e-06, "loss": 0.4674, "step": 1955 }, { "epoch": 6.744827586206896, "grad_norm": 2.0034801959991455, "learning_rate": 8.081379310344828e-06, "loss": 0.4413, "step": 1956 }, { "epoch": 6.748275862068965, "grad_norm": 1.0169901847839355, "learning_rate": 8.08551724137931e-06, "loss": 0.4222, "step": 1957 }, { "epoch": 6.751724137931035, "grad_norm": 1.9135476350784302, "learning_rate": 8.089655172413794e-06, "loss": 0.4464, "step": 1958 }, { "epoch": 6.755172413793104, "grad_norm": 1.3966248035430908, "learning_rate": 8.093793103448276e-06, "loss": 0.4334, "step": 1959 }, { "epoch": 6.758620689655173, "grad_norm": 1.3341954946517944, "learning_rate": 8.09793103448276e-06, "loss": 0.4459, "step": 1960 }, { "epoch": 6.7620689655172415, "grad_norm": 1.3784153461456299, "learning_rate": 8.102068965517241e-06, "loss": 0.4318, "step": 1961 }, { "epoch": 6.76551724137931, "grad_norm": 1.6184947490692139, "learning_rate": 8.106206896551725e-06, "loss": 0.4281, "step": 1962 }, { "epoch": 6.768965517241379, "grad_norm": 1.7999746799468994, "learning_rate": 8.110344827586207e-06, "loss": 0.4933, "step": 1963 }, { "epoch": 6.772413793103448, "grad_norm": 1.8514550924301147, "learning_rate": 8.114482758620689e-06, "loss": 0.5023, "step": 1964 }, { "epoch": 6.775862068965517, "grad_norm": 3.4959402084350586, "learning_rate": 8.118620689655171e-06, "loss": 0.7075, "step": 1965 }, { "epoch": 6.779310344827586, "grad_norm": 1.181872844696045, "learning_rate": 8.122758620689655e-06, "loss": 0.4686, "step": 1966 }, { "epoch": 6.7827586206896555, "grad_norm": 1.0596095323562622, "learning_rate": 8.126896551724139e-06, "loss": 0.4549, "step": 1967 }, { "epoch": 6.786206896551724, "grad_norm": 1.4104734659194946, "learning_rate": 8.13103448275862e-06, "loss": 0.4349, "step": 1968 }, { "epoch": 6.789655172413793, "grad_norm": 1.520361304283142, "learning_rate": 8.135172413793104e-06, "loss": 0.4309, "step": 1969 }, { "epoch": 6.793103448275862, "grad_norm": 1.1410067081451416, "learning_rate": 8.139310344827586e-06, "loss": 0.4291, "step": 1970 }, { "epoch": 6.796551724137931, "grad_norm": 1.1880799531936646, "learning_rate": 8.14344827586207e-06, "loss": 0.4213, "step": 1971 }, { "epoch": 6.8, "grad_norm": 1.1257212162017822, "learning_rate": 8.147586206896552e-06, "loss": 0.4486, "step": 1972 }, { "epoch": 6.803448275862069, "grad_norm": 1.0744179487228394, "learning_rate": 8.151724137931036e-06, "loss": 0.4204, "step": 1973 }, { "epoch": 6.8068965517241375, "grad_norm": 1.1275233030319214, "learning_rate": 8.155862068965518e-06, "loss": 0.411, "step": 1974 }, { "epoch": 6.810344827586206, "grad_norm": 0.9959591627120972, "learning_rate": 8.160000000000001e-06, "loss": 0.4076, "step": 1975 }, { "epoch": 6.813793103448276, "grad_norm": 1.172040343284607, "learning_rate": 8.164137931034484e-06, "loss": 0.4198, "step": 1976 }, { "epoch": 6.817241379310345, "grad_norm": 1.0718810558319092, "learning_rate": 8.168275862068966e-06, "loss": 0.4117, "step": 1977 }, { "epoch": 6.820689655172414, "grad_norm": 0.9639748334884644, "learning_rate": 8.172413793103448e-06, "loss": 0.4115, "step": 1978 }, { "epoch": 6.824137931034483, "grad_norm": 1.1335150003433228, "learning_rate": 8.176551724137931e-06, "loss": 0.3933, "step": 1979 }, { "epoch": 6.827586206896552, "grad_norm": 2.5185039043426514, "learning_rate": 8.180689655172413e-06, "loss": 0.4749, "step": 1980 }, { "epoch": 6.8310344827586205, "grad_norm": 1.1950737237930298, "learning_rate": 8.184827586206897e-06, "loss": 0.4247, "step": 1981 }, { "epoch": 6.834482758620689, "grad_norm": 1.5030207633972168, "learning_rate": 8.188965517241379e-06, "loss": 0.4145, "step": 1982 }, { "epoch": 6.837931034482759, "grad_norm": 1.6222866773605347, "learning_rate": 8.193103448275863e-06, "loss": 0.4381, "step": 1983 }, { "epoch": 6.841379310344828, "grad_norm": 2.241894245147705, "learning_rate": 8.197241379310345e-06, "loss": 0.4386, "step": 1984 }, { "epoch": 6.844827586206897, "grad_norm": 1.4052115678787231, "learning_rate": 8.201379310344828e-06, "loss": 0.4391, "step": 1985 }, { "epoch": 6.848275862068966, "grad_norm": 1.4367597103118896, "learning_rate": 8.20551724137931e-06, "loss": 0.4418, "step": 1986 }, { "epoch": 6.8517241379310345, "grad_norm": 1.321714162826538, "learning_rate": 8.209655172413794e-06, "loss": 0.4539, "step": 1987 }, { "epoch": 6.855172413793103, "grad_norm": 1.6121975183486938, "learning_rate": 8.213793103448276e-06, "loss": 0.5632, "step": 1988 }, { "epoch": 6.858620689655172, "grad_norm": 1.6473408937454224, "learning_rate": 8.21793103448276e-06, "loss": 0.5011, "step": 1989 }, { "epoch": 6.862068965517241, "grad_norm": 3.516505241394043, "learning_rate": 8.22206896551724e-06, "loss": 0.6219, "step": 1990 }, { "epoch": 6.86551724137931, "grad_norm": 1.070705533027649, "learning_rate": 8.226206896551724e-06, "loss": 0.4513, "step": 1991 }, { "epoch": 6.86896551724138, "grad_norm": 0.882283627986908, "learning_rate": 8.230344827586206e-06, "loss": 0.4401, "step": 1992 }, { "epoch": 6.872413793103449, "grad_norm": 0.9067946076393127, "learning_rate": 8.23448275862069e-06, "loss": 0.419, "step": 1993 }, { "epoch": 6.875862068965517, "grad_norm": 1.0533273220062256, "learning_rate": 8.238620689655172e-06, "loss": 0.4346, "step": 1994 }, { "epoch": 6.879310344827586, "grad_norm": 0.9759548306465149, "learning_rate": 8.242758620689655e-06, "loss": 0.4072, "step": 1995 }, { "epoch": 6.882758620689655, "grad_norm": 0.9452629685401917, "learning_rate": 8.246896551724137e-06, "loss": 0.4141, "step": 1996 }, { "epoch": 6.886206896551724, "grad_norm": 0.9874908924102783, "learning_rate": 8.251034482758621e-06, "loss": 0.3999, "step": 1997 }, { "epoch": 6.889655172413793, "grad_norm": 1.0018659830093384, "learning_rate": 8.255172413793103e-06, "loss": 0.4157, "step": 1998 }, { "epoch": 6.893103448275862, "grad_norm": 1.0105222463607788, "learning_rate": 8.259310344827587e-06, "loss": 0.4028, "step": 1999 }, { "epoch": 6.896551724137931, "grad_norm": 1.144472599029541, "learning_rate": 8.26344827586207e-06, "loss": 0.4035, "step": 2000 }, { "epoch": 6.896551724137931, "eval_cer": 0.15740096394236966, "eval_loss": 0.4278111457824707, "eval_runtime": 17.4612, "eval_samples_per_second": 53.089, "eval_steps_per_second": 0.172, "eval_wer": 0.39492753623188404, "step": 2000 }, { "epoch": 6.9, "grad_norm": 1.181758165359497, "learning_rate": 8.267586206896553e-06, "loss": 0.4329, "step": 2001 }, { "epoch": 6.903448275862069, "grad_norm": 1.1389281749725342, "learning_rate": 8.271724137931035e-06, "loss": 0.4453, "step": 2002 }, { "epoch": 6.906896551724138, "grad_norm": 1.1490135192871094, "learning_rate": 8.275862068965517e-06, "loss": 0.4157, "step": 2003 }, { "epoch": 6.910344827586207, "grad_norm": 1.512721061706543, "learning_rate": 8.28e-06, "loss": 0.4542, "step": 2004 }, { "epoch": 6.913793103448276, "grad_norm": 1.1070538759231567, "learning_rate": 8.284137931034482e-06, "loss": 0.4325, "step": 2005 }, { "epoch": 6.917241379310345, "grad_norm": 1.139947533607483, "learning_rate": 8.288275862068966e-06, "loss": 0.4457, "step": 2006 }, { "epoch": 6.9206896551724135, "grad_norm": 1.456903338432312, "learning_rate": 8.292413793103448e-06, "loss": 0.4394, "step": 2007 }, { "epoch": 6.924137931034482, "grad_norm": 1.2403497695922852, "learning_rate": 8.296551724137932e-06, "loss": 0.4548, "step": 2008 }, { "epoch": 6.927586206896551, "grad_norm": 3.3906002044677734, "learning_rate": 8.300689655172414e-06, "loss": 0.415, "step": 2009 }, { "epoch": 6.931034482758621, "grad_norm": 1.2797516584396362, "learning_rate": 8.304827586206898e-06, "loss": 0.4245, "step": 2010 }, { "epoch": 6.93448275862069, "grad_norm": 3.0499725341796875, "learning_rate": 8.30896551724138e-06, "loss": 0.4823, "step": 2011 }, { "epoch": 6.937931034482759, "grad_norm": 1.5759822130203247, "learning_rate": 8.313103448275863e-06, "loss": 0.4713, "step": 2012 }, { "epoch": 6.941379310344828, "grad_norm": 1.5487921237945557, "learning_rate": 8.317241379310345e-06, "loss": 0.4593, "step": 2013 }, { "epoch": 6.944827586206896, "grad_norm": 1.672085165977478, "learning_rate": 8.321379310344829e-06, "loss": 0.5081, "step": 2014 }, { "epoch": 6.948275862068965, "grad_norm": 2.0889809131622314, "learning_rate": 8.32551724137931e-06, "loss": 0.5665, "step": 2015 }, { "epoch": 6.951724137931034, "grad_norm": 1.5146253108978271, "learning_rate": 8.329655172413793e-06, "loss": 0.4919, "step": 2016 }, { "epoch": 6.955172413793104, "grad_norm": 0.8766818642616272, "learning_rate": 8.333793103448275e-06, "loss": 0.4352, "step": 2017 }, { "epoch": 6.958620689655173, "grad_norm": 1.4559069871902466, "learning_rate": 8.337931034482759e-06, "loss": 0.4427, "step": 2018 }, { "epoch": 6.962068965517242, "grad_norm": 1.1004458665847778, "learning_rate": 8.34206896551724e-06, "loss": 0.4427, "step": 2019 }, { "epoch": 6.9655172413793105, "grad_norm": 1.0831503868103027, "learning_rate": 8.346206896551724e-06, "loss": 0.4609, "step": 2020 }, { "epoch": 6.968965517241379, "grad_norm": 0.9715977907180786, "learning_rate": 8.350344827586207e-06, "loss": 0.4428, "step": 2021 }, { "epoch": 6.972413793103448, "grad_norm": 1.0220986604690552, "learning_rate": 8.35448275862069e-06, "loss": 0.4136, "step": 2022 }, { "epoch": 6.975862068965517, "grad_norm": 1.0969382524490356, "learning_rate": 8.358620689655172e-06, "loss": 0.4102, "step": 2023 }, { "epoch": 6.979310344827586, "grad_norm": 1.170274019241333, "learning_rate": 8.362758620689656e-06, "loss": 0.4361, "step": 2024 }, { "epoch": 6.982758620689655, "grad_norm": 1.612460970878601, "learning_rate": 8.366896551724138e-06, "loss": 0.4559, "step": 2025 }, { "epoch": 6.9862068965517246, "grad_norm": 1.4657292366027832, "learning_rate": 8.371034482758622e-06, "loss": 0.4802, "step": 2026 }, { "epoch": 6.989655172413793, "grad_norm": 1.538144826889038, "learning_rate": 8.375172413793104e-06, "loss": 0.4281, "step": 2027 }, { "epoch": 6.993103448275862, "grad_norm": 1.5122272968292236, "learning_rate": 8.379310344827586e-06, "loss": 0.4919, "step": 2028 }, { "epoch": 6.996551724137931, "grad_norm": 1.7642943859100342, "learning_rate": 8.383448275862068e-06, "loss": 0.4569, "step": 2029 }, { "epoch": 7.0, "grad_norm": 1.8822975158691406, "learning_rate": 8.387586206896551e-06, "loss": 0.6432, "step": 2030 }, { "epoch": 7.003448275862069, "grad_norm": 1.2204548120498657, "learning_rate": 8.391724137931033e-06, "loss": 0.4586, "step": 2031 }, { "epoch": 7.006896551724138, "grad_norm": 0.982090950012207, "learning_rate": 8.395862068965517e-06, "loss": 0.4118, "step": 2032 }, { "epoch": 7.010344827586207, "grad_norm": 1.1733508110046387, "learning_rate": 8.400000000000001e-06, "loss": 0.4235, "step": 2033 }, { "epoch": 7.0137931034482754, "grad_norm": 1.1994585990905762, "learning_rate": 8.404137931034483e-06, "loss": 0.3944, "step": 2034 }, { "epoch": 7.017241379310345, "grad_norm": 0.8141288757324219, "learning_rate": 8.408275862068967e-06, "loss": 0.4088, "step": 2035 }, { "epoch": 7.020689655172414, "grad_norm": 0.979134202003479, "learning_rate": 8.412413793103449e-06, "loss": 0.4185, "step": 2036 }, { "epoch": 7.024137931034483, "grad_norm": 1.4781606197357178, "learning_rate": 8.416551724137932e-06, "loss": 0.4356, "step": 2037 }, { "epoch": 7.027586206896552, "grad_norm": 1.3877899646759033, "learning_rate": 8.420689655172414e-06, "loss": 0.4226, "step": 2038 }, { "epoch": 7.031034482758621, "grad_norm": 2.773306131362915, "learning_rate": 8.424827586206898e-06, "loss": 0.3993, "step": 2039 }, { "epoch": 7.0344827586206895, "grad_norm": 2.7077205181121826, "learning_rate": 8.42896551724138e-06, "loss": 0.4181, "step": 2040 }, { "epoch": 7.037931034482758, "grad_norm": 1.185450792312622, "learning_rate": 8.433103448275862e-06, "loss": 0.4175, "step": 2041 }, { "epoch": 7.041379310344827, "grad_norm": 0.988084077835083, "learning_rate": 8.437241379310344e-06, "loss": 0.4195, "step": 2042 }, { "epoch": 7.044827586206897, "grad_norm": 1.707456111907959, "learning_rate": 8.441379310344828e-06, "loss": 0.415, "step": 2043 }, { "epoch": 7.048275862068966, "grad_norm": 1.03316068649292, "learning_rate": 8.44551724137931e-06, "loss": 0.3851, "step": 2044 }, { "epoch": 7.051724137931035, "grad_norm": 4.701521873474121, "learning_rate": 8.449655172413794e-06, "loss": 0.463, "step": 2045 }, { "epoch": 7.055172413793104, "grad_norm": 1.5135583877563477, "learning_rate": 8.453793103448276e-06, "loss": 0.4047, "step": 2046 }, { "epoch": 7.058620689655172, "grad_norm": 10.336894989013672, "learning_rate": 8.45793103448276e-06, "loss": 0.3991, "step": 2047 }, { "epoch": 7.062068965517241, "grad_norm": 1.0506373643875122, "learning_rate": 8.462068965517241e-06, "loss": 0.4505, "step": 2048 }, { "epoch": 7.06551724137931, "grad_norm": 1.381252646446228, "learning_rate": 8.466206896551725e-06, "loss": 0.4093, "step": 2049 }, { "epoch": 7.068965517241379, "grad_norm": 1.3582420349121094, "learning_rate": 8.470344827586207e-06, "loss": 0.4027, "step": 2050 }, { "epoch": 7.072413793103448, "grad_norm": 1.8177586793899536, "learning_rate": 8.47448275862069e-06, "loss": 0.4291, "step": 2051 }, { "epoch": 7.075862068965518, "grad_norm": 1.6283975839614868, "learning_rate": 8.478620689655173e-06, "loss": 0.4492, "step": 2052 }, { "epoch": 7.0793103448275865, "grad_norm": 1.3772128820419312, "learning_rate": 8.482758620689656e-06, "loss": 0.4741, "step": 2053 }, { "epoch": 7.082758620689655, "grad_norm": 2.182516098022461, "learning_rate": 8.486896551724137e-06, "loss": 0.501, "step": 2054 }, { "epoch": 7.086206896551724, "grad_norm": 3.7475454807281494, "learning_rate": 8.49103448275862e-06, "loss": 0.622, "step": 2055 }, { "epoch": 7.089655172413793, "grad_norm": 1.6587482690811157, "learning_rate": 8.495172413793103e-06, "loss": 0.4447, "step": 2056 }, { "epoch": 7.093103448275862, "grad_norm": 1.0265586376190186, "learning_rate": 8.499310344827586e-06, "loss": 0.4035, "step": 2057 }, { "epoch": 7.096551724137931, "grad_norm": 0.9017795324325562, "learning_rate": 8.503448275862068e-06, "loss": 0.3884, "step": 2058 }, { "epoch": 7.1, "grad_norm": 0.9561929106712341, "learning_rate": 8.507586206896552e-06, "loss": 0.4024, "step": 2059 }, { "epoch": 7.103448275862069, "grad_norm": 1.7101480960845947, "learning_rate": 8.511724137931034e-06, "loss": 0.3895, "step": 2060 }, { "epoch": 7.106896551724138, "grad_norm": 0.9805511236190796, "learning_rate": 8.515862068965518e-06, "loss": 0.4424, "step": 2061 }, { "epoch": 7.110344827586207, "grad_norm": 1.2657831907272339, "learning_rate": 8.52e-06, "loss": 0.4079, "step": 2062 }, { "epoch": 7.113793103448276, "grad_norm": 1.2215040922164917, "learning_rate": 8.524137931034483e-06, "loss": 0.4245, "step": 2063 }, { "epoch": 7.117241379310345, "grad_norm": 1.1414432525634766, "learning_rate": 8.528275862068965e-06, "loss": 0.4694, "step": 2064 }, { "epoch": 7.120689655172414, "grad_norm": 0.927470862865448, "learning_rate": 8.53241379310345e-06, "loss": 0.3605, "step": 2065 }, { "epoch": 7.124137931034483, "grad_norm": 0.9715412855148315, "learning_rate": 8.536551724137933e-06, "loss": 0.405, "step": 2066 }, { "epoch": 7.127586206896551, "grad_norm": 1.1529747247695923, "learning_rate": 8.540689655172413e-06, "loss": 0.405, "step": 2067 }, { "epoch": 7.13103448275862, "grad_norm": 1.168617606163025, "learning_rate": 8.544827586206897e-06, "loss": 0.4136, "step": 2068 }, { "epoch": 7.13448275862069, "grad_norm": 1.149112343788147, "learning_rate": 8.548965517241379e-06, "loss": 0.4306, "step": 2069 }, { "epoch": 7.137931034482759, "grad_norm": 1.3226076364517212, "learning_rate": 8.553103448275863e-06, "loss": 0.4265, "step": 2070 }, { "epoch": 7.141379310344828, "grad_norm": 1.072123408317566, "learning_rate": 8.557241379310345e-06, "loss": 0.4176, "step": 2071 }, { "epoch": 7.144827586206897, "grad_norm": 1.3373545408248901, "learning_rate": 8.561379310344828e-06, "loss": 0.4409, "step": 2072 }, { "epoch": 7.1482758620689655, "grad_norm": 1.2007538080215454, "learning_rate": 8.56551724137931e-06, "loss": 0.389, "step": 2073 }, { "epoch": 7.151724137931034, "grad_norm": 1.480427861213684, "learning_rate": 8.569655172413794e-06, "loss": 0.4488, "step": 2074 }, { "epoch": 7.155172413793103, "grad_norm": 1.5161381959915161, "learning_rate": 8.573793103448276e-06, "loss": 0.4477, "step": 2075 }, { "epoch": 7.158620689655172, "grad_norm": 1.5106016397476196, "learning_rate": 8.57793103448276e-06, "loss": 0.4173, "step": 2076 }, { "epoch": 7.162068965517241, "grad_norm": 3.5718791484832764, "learning_rate": 8.582068965517242e-06, "loss": 0.4522, "step": 2077 }, { "epoch": 7.165517241379311, "grad_norm": 1.8647154569625854, "learning_rate": 8.586206896551726e-06, "loss": 0.4697, "step": 2078 }, { "epoch": 7.1689655172413795, "grad_norm": 1.5415124893188477, "learning_rate": 8.590344827586208e-06, "loss": 0.4234, "step": 2079 }, { "epoch": 7.172413793103448, "grad_norm": 2.2789342403411865, "learning_rate": 8.59448275862069e-06, "loss": 0.5714, "step": 2080 }, { "epoch": 7.175862068965517, "grad_norm": 0.8857113122940063, "learning_rate": 8.598620689655172e-06, "loss": 0.4999, "step": 2081 }, { "epoch": 7.179310344827586, "grad_norm": 0.7162754535675049, "learning_rate": 8.602758620689655e-06, "loss": 0.4159, "step": 2082 }, { "epoch": 7.182758620689655, "grad_norm": 0.8390347957611084, "learning_rate": 8.606896551724137e-06, "loss": 0.3934, "step": 2083 }, { "epoch": 7.186206896551724, "grad_norm": 0.8842557072639465, "learning_rate": 8.611034482758621e-06, "loss": 0.4121, "step": 2084 }, { "epoch": 7.189655172413793, "grad_norm": 0.8231489062309265, "learning_rate": 8.615172413793103e-06, "loss": 0.3812, "step": 2085 }, { "epoch": 7.1931034482758625, "grad_norm": 0.8310990929603577, "learning_rate": 8.619310344827587e-06, "loss": 0.4005, "step": 2086 }, { "epoch": 7.196551724137931, "grad_norm": 1.1275904178619385, "learning_rate": 8.623448275862069e-06, "loss": 0.4258, "step": 2087 }, { "epoch": 7.2, "grad_norm": 1.0086743831634521, "learning_rate": 8.627586206896553e-06, "loss": 0.3868, "step": 2088 }, { "epoch": 7.203448275862069, "grad_norm": 0.8665248155593872, "learning_rate": 8.631724137931035e-06, "loss": 0.4045, "step": 2089 }, { "epoch": 7.206896551724138, "grad_norm": 0.9042357802391052, "learning_rate": 8.635862068965518e-06, "loss": 0.3922, "step": 2090 }, { "epoch": 7.210344827586207, "grad_norm": 0.9556676149368286, "learning_rate": 8.64e-06, "loss": 0.4219, "step": 2091 }, { "epoch": 7.213793103448276, "grad_norm": 1.4322420358657837, "learning_rate": 8.644137931034482e-06, "loss": 0.4068, "step": 2092 }, { "epoch": 7.2172413793103445, "grad_norm": 0.9926182627677917, "learning_rate": 8.648275862068964e-06, "loss": 0.4008, "step": 2093 }, { "epoch": 7.220689655172414, "grad_norm": 1.1653077602386475, "learning_rate": 8.652413793103448e-06, "loss": 0.3619, "step": 2094 }, { "epoch": 7.224137931034483, "grad_norm": 1.1303009986877441, "learning_rate": 8.65655172413793e-06, "loss": 0.4056, "step": 2095 }, { "epoch": 7.227586206896552, "grad_norm": 1.1988719701766968, "learning_rate": 8.660689655172414e-06, "loss": 0.3873, "step": 2096 }, { "epoch": 7.231034482758621, "grad_norm": 1.8566943407058716, "learning_rate": 8.664827586206896e-06, "loss": 0.4367, "step": 2097 }, { "epoch": 7.23448275862069, "grad_norm": 1.3799281120300293, "learning_rate": 8.66896551724138e-06, "loss": 0.4558, "step": 2098 }, { "epoch": 7.2379310344827585, "grad_norm": 1.0587869882583618, "learning_rate": 8.673103448275863e-06, "loss": 0.4315, "step": 2099 }, { "epoch": 7.241379310344827, "grad_norm": 2.0902671813964844, "learning_rate": 8.677241379310345e-06, "loss": 0.4405, "step": 2100 }, { "epoch": 7.244827586206896, "grad_norm": 2.3534295558929443, "learning_rate": 8.681379310344829e-06, "loss": 0.4566, "step": 2101 }, { "epoch": 7.248275862068965, "grad_norm": 1.4625024795532227, "learning_rate": 8.685517241379311e-06, "loss": 0.4068, "step": 2102 }, { "epoch": 7.251724137931035, "grad_norm": 2.6516683101654053, "learning_rate": 8.689655172413795e-06, "loss": 0.4547, "step": 2103 }, { "epoch": 7.255172413793104, "grad_norm": 1.99648118019104, "learning_rate": 8.693793103448277e-06, "loss": 0.4966, "step": 2104 }, { "epoch": 7.258620689655173, "grad_norm": 3.0477399826049805, "learning_rate": 8.697931034482759e-06, "loss": 0.6522, "step": 2105 }, { "epoch": 7.2620689655172415, "grad_norm": 1.3401633501052856, "learning_rate": 8.70206896551724e-06, "loss": 0.4736, "step": 2106 }, { "epoch": 7.26551724137931, "grad_norm": 0.8731115460395813, "learning_rate": 8.706206896551724e-06, "loss": 0.4196, "step": 2107 }, { "epoch": 7.268965517241379, "grad_norm": 0.973273515701294, "learning_rate": 8.710344827586206e-06, "loss": 0.4519, "step": 2108 }, { "epoch": 7.272413793103448, "grad_norm": 1.0972117185592651, "learning_rate": 8.71448275862069e-06, "loss": 0.4139, "step": 2109 }, { "epoch": 7.275862068965517, "grad_norm": 1.1989047527313232, "learning_rate": 8.718620689655172e-06, "loss": 0.4147, "step": 2110 }, { "epoch": 7.279310344827586, "grad_norm": 0.9615621566772461, "learning_rate": 8.722758620689656e-06, "loss": 0.373, "step": 2111 }, { "epoch": 7.2827586206896555, "grad_norm": 1.2603086233139038, "learning_rate": 8.726896551724138e-06, "loss": 0.4307, "step": 2112 }, { "epoch": 7.286206896551724, "grad_norm": 1.315643072128296, "learning_rate": 8.731034482758622e-06, "loss": 0.5012, "step": 2113 }, { "epoch": 7.289655172413793, "grad_norm": 0.9211488962173462, "learning_rate": 8.735172413793104e-06, "loss": 0.3828, "step": 2114 }, { "epoch": 7.293103448275862, "grad_norm": 1.2741544246673584, "learning_rate": 8.739310344827587e-06, "loss": 0.388, "step": 2115 }, { "epoch": 7.296551724137931, "grad_norm": 1.0222026109695435, "learning_rate": 8.74344827586207e-06, "loss": 0.3785, "step": 2116 }, { "epoch": 7.3, "grad_norm": 1.0942771434783936, "learning_rate": 8.747586206896553e-06, "loss": 0.4176, "step": 2117 }, { "epoch": 7.303448275862069, "grad_norm": 1.2450851202011108, "learning_rate": 8.751724137931033e-06, "loss": 0.4025, "step": 2118 }, { "epoch": 7.3068965517241375, "grad_norm": 1.2166130542755127, "learning_rate": 8.755862068965517e-06, "loss": 0.3882, "step": 2119 }, { "epoch": 7.310344827586207, "grad_norm": 1.1817197799682617, "learning_rate": 8.759999999999999e-06, "loss": 0.4243, "step": 2120 }, { "epoch": 7.313793103448276, "grad_norm": 2.1113057136535645, "learning_rate": 8.764137931034483e-06, "loss": 0.4321, "step": 2121 }, { "epoch": 7.317241379310345, "grad_norm": 1.194806694984436, "learning_rate": 8.768275862068965e-06, "loss": 0.3973, "step": 2122 }, { "epoch": 7.320689655172414, "grad_norm": 1.196812629699707, "learning_rate": 8.772413793103449e-06, "loss": 0.4003, "step": 2123 }, { "epoch": 7.324137931034483, "grad_norm": 1.511155128479004, "learning_rate": 8.77655172413793e-06, "loss": 0.414, "step": 2124 }, { "epoch": 7.327586206896552, "grad_norm": 1.2543838024139404, "learning_rate": 8.780689655172414e-06, "loss": 0.4273, "step": 2125 }, { "epoch": 7.3310344827586205, "grad_norm": 1.678450107574463, "learning_rate": 8.784827586206896e-06, "loss": 0.4322, "step": 2126 }, { "epoch": 7.334482758620689, "grad_norm": 1.4670203924179077, "learning_rate": 8.78896551724138e-06, "loss": 0.4191, "step": 2127 }, { "epoch": 7.337931034482759, "grad_norm": 2.2054452896118164, "learning_rate": 8.793103448275862e-06, "loss": 0.4578, "step": 2128 }, { "epoch": 7.341379310344828, "grad_norm": 3.133143424987793, "learning_rate": 8.797241379310346e-06, "loss": 0.5392, "step": 2129 }, { "epoch": 7.344827586206897, "grad_norm": 1.8931432962417603, "learning_rate": 8.801379310344828e-06, "loss": 0.5769, "step": 2130 }, { "epoch": 7.348275862068966, "grad_norm": 1.538588047027588, "learning_rate": 8.80551724137931e-06, "loss": 0.4529, "step": 2131 }, { "epoch": 7.3517241379310345, "grad_norm": 0.9879276752471924, "learning_rate": 8.809655172413794e-06, "loss": 0.4329, "step": 2132 }, { "epoch": 7.355172413793103, "grad_norm": 1.1127454042434692, "learning_rate": 8.813793103448276e-06, "loss": 0.4195, "step": 2133 }, { "epoch": 7.358620689655172, "grad_norm": 1.2183266878128052, "learning_rate": 8.81793103448276e-06, "loss": 0.4488, "step": 2134 }, { "epoch": 7.362068965517241, "grad_norm": 1.0689938068389893, "learning_rate": 8.822068965517241e-06, "loss": 0.4069, "step": 2135 }, { "epoch": 7.36551724137931, "grad_norm": 1.1136338710784912, "learning_rate": 8.826206896551725e-06, "loss": 0.3723, "step": 2136 }, { "epoch": 7.36896551724138, "grad_norm": 1.3375378847122192, "learning_rate": 8.830344827586207e-06, "loss": 0.3917, "step": 2137 }, { "epoch": 7.372413793103449, "grad_norm": 0.9649126529693604, "learning_rate": 8.83448275862069e-06, "loss": 0.3865, "step": 2138 }, { "epoch": 7.375862068965517, "grad_norm": 0.9150882363319397, "learning_rate": 8.838620689655173e-06, "loss": 0.3892, "step": 2139 }, { "epoch": 7.379310344827586, "grad_norm": 0.9838209748268127, "learning_rate": 8.842758620689656e-06, "loss": 0.3804, "step": 2140 }, { "epoch": 7.382758620689655, "grad_norm": 1.051039695739746, "learning_rate": 8.846896551724138e-06, "loss": 0.3917, "step": 2141 }, { "epoch": 7.386206896551724, "grad_norm": 1.1499810218811035, "learning_rate": 8.851034482758622e-06, "loss": 0.4393, "step": 2142 }, { "epoch": 7.389655172413793, "grad_norm": 1.2298195362091064, "learning_rate": 8.855172413793104e-06, "loss": 0.4209, "step": 2143 }, { "epoch": 7.393103448275862, "grad_norm": 1.0901893377304077, "learning_rate": 8.859310344827586e-06, "loss": 0.4137, "step": 2144 }, { "epoch": 7.396551724137931, "grad_norm": 1.1455057859420776, "learning_rate": 8.863448275862068e-06, "loss": 0.4332, "step": 2145 }, { "epoch": 7.4, "grad_norm": 1.0200468301773071, "learning_rate": 8.867586206896552e-06, "loss": 0.3983, "step": 2146 }, { "epoch": 7.403448275862069, "grad_norm": 1.1251850128173828, "learning_rate": 8.871724137931034e-06, "loss": 0.4445, "step": 2147 }, { "epoch": 7.406896551724138, "grad_norm": 1.2691491842269897, "learning_rate": 8.875862068965518e-06, "loss": 0.4721, "step": 2148 }, { "epoch": 7.410344827586207, "grad_norm": 1.3238576650619507, "learning_rate": 8.88e-06, "loss": 0.4617, "step": 2149 }, { "epoch": 7.413793103448276, "grad_norm": 1.1165536642074585, "learning_rate": 8.884137931034483e-06, "loss": 0.3984, "step": 2150 }, { "epoch": 7.417241379310345, "grad_norm": 3.591275930404663, "learning_rate": 8.888275862068965e-06, "loss": 0.4609, "step": 2151 }, { "epoch": 7.4206896551724135, "grad_norm": 1.4425383806228638, "learning_rate": 8.892413793103449e-06, "loss": 0.479, "step": 2152 }, { "epoch": 7.424137931034482, "grad_norm": 1.5005100965499878, "learning_rate": 8.896551724137931e-06, "loss": 0.4895, "step": 2153 }, { "epoch": 7.427586206896552, "grad_norm": 2.3767216205596924, "learning_rate": 8.900689655172415e-06, "loss": 0.4875, "step": 2154 }, { "epoch": 7.431034482758621, "grad_norm": 3.0583128929138184, "learning_rate": 8.904827586206897e-06, "loss": 0.5562, "step": 2155 }, { "epoch": 7.43448275862069, "grad_norm": 0.8108142018318176, "learning_rate": 8.90896551724138e-06, "loss": 0.4407, "step": 2156 }, { "epoch": 7.437931034482759, "grad_norm": 1.1970654726028442, "learning_rate": 8.913103448275861e-06, "loss": 0.4183, "step": 2157 }, { "epoch": 7.441379310344828, "grad_norm": 0.7658257484436035, "learning_rate": 8.917241379310345e-06, "loss": 0.4131, "step": 2158 }, { "epoch": 7.444827586206896, "grad_norm": 0.9031209945678711, "learning_rate": 8.921379310344827e-06, "loss": 0.3969, "step": 2159 }, { "epoch": 7.448275862068965, "grad_norm": 1.0554771423339844, "learning_rate": 8.92551724137931e-06, "loss": 0.4069, "step": 2160 }, { "epoch": 7.451724137931034, "grad_norm": 0.8328974843025208, "learning_rate": 8.929655172413792e-06, "loss": 0.3804, "step": 2161 }, { "epoch": 7.455172413793104, "grad_norm": 0.8024486899375916, "learning_rate": 8.933793103448276e-06, "loss": 0.4109, "step": 2162 }, { "epoch": 7.458620689655173, "grad_norm": 1.052390217781067, "learning_rate": 8.937931034482758e-06, "loss": 0.4158, "step": 2163 }, { "epoch": 7.462068965517242, "grad_norm": 0.8593242764472961, "learning_rate": 8.942068965517242e-06, "loss": 0.3552, "step": 2164 }, { "epoch": 7.4655172413793105, "grad_norm": 0.9341287016868591, "learning_rate": 8.946206896551726e-06, "loss": 0.394, "step": 2165 }, { "epoch": 7.468965517241379, "grad_norm": 1.0073024034500122, "learning_rate": 8.950344827586208e-06, "loss": 0.4362, "step": 2166 }, { "epoch": 7.472413793103448, "grad_norm": 1.148077130317688, "learning_rate": 8.954482758620691e-06, "loss": 0.3957, "step": 2167 }, { "epoch": 7.475862068965517, "grad_norm": 0.9656093716621399, "learning_rate": 8.958620689655173e-06, "loss": 0.4108, "step": 2168 }, { "epoch": 7.479310344827586, "grad_norm": 1.0590806007385254, "learning_rate": 8.962758620689655e-06, "loss": 0.4303, "step": 2169 }, { "epoch": 7.482758620689655, "grad_norm": 1.4147852659225464, "learning_rate": 8.966896551724137e-06, "loss": 0.4054, "step": 2170 }, { "epoch": 7.4862068965517246, "grad_norm": 1.7751353979110718, "learning_rate": 8.971034482758621e-06, "loss": 0.425, "step": 2171 }, { "epoch": 7.489655172413793, "grad_norm": 1.0491622686386108, "learning_rate": 8.975172413793103e-06, "loss": 0.3924, "step": 2172 }, { "epoch": 7.493103448275862, "grad_norm": 1.0078500509262085, "learning_rate": 8.979310344827587e-06, "loss": 0.4103, "step": 2173 }, { "epoch": 7.496551724137931, "grad_norm": 1.1976892948150635, "learning_rate": 8.983448275862069e-06, "loss": 0.4149, "step": 2174 }, { "epoch": 7.5, "grad_norm": 1.639077067375183, "learning_rate": 8.987586206896552e-06, "loss": 0.4189, "step": 2175 }, { "epoch": 7.503448275862069, "grad_norm": 1.6129969358444214, "learning_rate": 8.991724137931034e-06, "loss": 0.4468, "step": 2176 }, { "epoch": 7.506896551724138, "grad_norm": 1.5629346370697021, "learning_rate": 8.995862068965518e-06, "loss": 0.417, "step": 2177 }, { "epoch": 7.510344827586207, "grad_norm": 2.2098448276519775, "learning_rate": 9e-06, "loss": 0.4832, "step": 2178 }, { "epoch": 7.5137931034482754, "grad_norm": 7.895441055297852, "learning_rate": 9.004137931034484e-06, "loss": 0.5041, "step": 2179 }, { "epoch": 7.517241379310345, "grad_norm": 2.504781723022461, "learning_rate": 9.008275862068966e-06, "loss": 0.603, "step": 2180 }, { "epoch": 7.520689655172414, "grad_norm": 0.8889501690864563, "learning_rate": 9.01241379310345e-06, "loss": 0.4351, "step": 2181 }, { "epoch": 7.524137931034483, "grad_norm": 1.0362904071807861, "learning_rate": 9.01655172413793e-06, "loss": 0.404, "step": 2182 }, { "epoch": 7.527586206896552, "grad_norm": 0.8919817209243774, "learning_rate": 9.020689655172414e-06, "loss": 0.452, "step": 2183 }, { "epoch": 7.531034482758621, "grad_norm": 0.7575556635856628, "learning_rate": 9.024827586206896e-06, "loss": 0.3945, "step": 2184 }, { "epoch": 7.5344827586206895, "grad_norm": 0.9474807977676392, "learning_rate": 9.02896551724138e-06, "loss": 0.423, "step": 2185 }, { "epoch": 7.537931034482758, "grad_norm": 0.9180471897125244, "learning_rate": 9.033103448275861e-06, "loss": 0.444, "step": 2186 }, { "epoch": 7.541379310344827, "grad_norm": 0.9827167391777039, "learning_rate": 9.037241379310345e-06, "loss": 0.3971, "step": 2187 }, { "epoch": 7.544827586206896, "grad_norm": 0.8241952657699585, "learning_rate": 9.041379310344827e-06, "loss": 0.3812, "step": 2188 }, { "epoch": 7.548275862068966, "grad_norm": 1.2925150394439697, "learning_rate": 9.045517241379311e-06, "loss": 0.4089, "step": 2189 }, { "epoch": 7.551724137931035, "grad_norm": 0.9346232414245605, "learning_rate": 9.049655172413793e-06, "loss": 0.3951, "step": 2190 }, { "epoch": 7.555172413793104, "grad_norm": 0.9799219369888306, "learning_rate": 9.053793103448277e-06, "loss": 0.3921, "step": 2191 }, { "epoch": 7.558620689655172, "grad_norm": 1.0860183238983154, "learning_rate": 9.057931034482759e-06, "loss": 0.4136, "step": 2192 }, { "epoch": 7.562068965517241, "grad_norm": 1.9146045446395874, "learning_rate": 9.062068965517242e-06, "loss": 0.3832, "step": 2193 }, { "epoch": 7.56551724137931, "grad_norm": 1.1347551345825195, "learning_rate": 9.066206896551724e-06, "loss": 0.4148, "step": 2194 }, { "epoch": 7.568965517241379, "grad_norm": 2.401001453399658, "learning_rate": 9.070344827586206e-06, "loss": 0.4288, "step": 2195 }, { "epoch": 7.572413793103449, "grad_norm": 1.2191271781921387, "learning_rate": 9.074482758620688e-06, "loss": 0.3955, "step": 2196 }, { "epoch": 7.575862068965518, "grad_norm": 2.623218059539795, "learning_rate": 9.078620689655172e-06, "loss": 0.4166, "step": 2197 }, { "epoch": 7.5793103448275865, "grad_norm": 1.3382072448730469, "learning_rate": 9.082758620689656e-06, "loss": 0.4051, "step": 2198 }, { "epoch": 7.582758620689655, "grad_norm": 1.3074190616607666, "learning_rate": 9.086896551724138e-06, "loss": 0.4314, "step": 2199 }, { "epoch": 7.586206896551724, "grad_norm": 1.5801029205322266, "learning_rate": 9.091034482758622e-06, "loss": 0.4683, "step": 2200 }, { "epoch": 7.589655172413793, "grad_norm": 1.2691867351531982, "learning_rate": 9.095172413793104e-06, "loss": 0.3899, "step": 2201 }, { "epoch": 7.593103448275862, "grad_norm": 1.7643396854400635, "learning_rate": 9.099310344827587e-06, "loss": 0.4565, "step": 2202 }, { "epoch": 7.596551724137931, "grad_norm": 1.4248507022857666, "learning_rate": 9.10344827586207e-06, "loss": 0.4213, "step": 2203 }, { "epoch": 7.6, "grad_norm": 1.6611350774765015, "learning_rate": 9.107586206896553e-06, "loss": 0.4415, "step": 2204 }, { "epoch": 7.603448275862069, "grad_norm": 4.27972936630249, "learning_rate": 9.111724137931035e-06, "loss": 0.6711, "step": 2205 }, { "epoch": 7.606896551724138, "grad_norm": 0.8399089574813843, "learning_rate": 9.115862068965519e-06, "loss": 0.4183, "step": 2206 }, { "epoch": 7.610344827586207, "grad_norm": 0.9432450532913208, "learning_rate": 9.12e-06, "loss": 0.3712, "step": 2207 }, { "epoch": 7.613793103448276, "grad_norm": 0.9144236445426941, "learning_rate": 9.124137931034483e-06, "loss": 0.4215, "step": 2208 }, { "epoch": 7.617241379310345, "grad_norm": 0.8351313471794128, "learning_rate": 9.128275862068965e-06, "loss": 0.3896, "step": 2209 }, { "epoch": 7.620689655172414, "grad_norm": 0.8086634874343872, "learning_rate": 9.132413793103449e-06, "loss": 0.3897, "step": 2210 }, { "epoch": 7.624137931034483, "grad_norm": 1.2182567119598389, "learning_rate": 9.13655172413793e-06, "loss": 0.3954, "step": 2211 }, { "epoch": 7.627586206896551, "grad_norm": 1.1666454076766968, "learning_rate": 9.140689655172414e-06, "loss": 0.4025, "step": 2212 }, { "epoch": 7.63103448275862, "grad_norm": 1.107987880706787, "learning_rate": 9.144827586206896e-06, "loss": 0.3641, "step": 2213 }, { "epoch": 7.63448275862069, "grad_norm": 0.865919828414917, "learning_rate": 9.14896551724138e-06, "loss": 0.3545, "step": 2214 }, { "epoch": 7.637931034482759, "grad_norm": 1.0079078674316406, "learning_rate": 9.153103448275862e-06, "loss": 0.413, "step": 2215 }, { "epoch": 7.641379310344828, "grad_norm": 2.0202853679656982, "learning_rate": 9.157241379310346e-06, "loss": 0.3669, "step": 2216 }, { "epoch": 7.644827586206897, "grad_norm": 1.5625061988830566, "learning_rate": 9.161379310344828e-06, "loss": 0.4067, "step": 2217 }, { "epoch": 7.6482758620689655, "grad_norm": 0.9985549449920654, "learning_rate": 9.165517241379311e-06, "loss": 0.3746, "step": 2218 }, { "epoch": 7.651724137931034, "grad_norm": 1.0633141994476318, "learning_rate": 9.169655172413793e-06, "loss": 0.3808, "step": 2219 }, { "epoch": 7.655172413793103, "grad_norm": 1.3856874704360962, "learning_rate": 9.173793103448277e-06, "loss": 0.4291, "step": 2220 }, { "epoch": 7.658620689655172, "grad_norm": 1.2368457317352295, "learning_rate": 9.177931034482757e-06, "loss": 0.3965, "step": 2221 }, { "epoch": 7.662068965517241, "grad_norm": 1.3958101272583008, "learning_rate": 9.182068965517241e-06, "loss": 0.4043, "step": 2222 }, { "epoch": 7.665517241379311, "grad_norm": 1.6540765762329102, "learning_rate": 9.186206896551723e-06, "loss": 0.403, "step": 2223 }, { "epoch": 7.6689655172413795, "grad_norm": 1.2103148698806763, "learning_rate": 9.190344827586207e-06, "loss": 0.4238, "step": 2224 }, { "epoch": 7.672413793103448, "grad_norm": 1.5143967866897583, "learning_rate": 9.194482758620689e-06, "loss": 0.4265, "step": 2225 }, { "epoch": 7.675862068965517, "grad_norm": 1.526318073272705, "learning_rate": 9.198620689655173e-06, "loss": 0.4389, "step": 2226 }, { "epoch": 7.679310344827586, "grad_norm": 1.485182285308838, "learning_rate": 9.202758620689655e-06, "loss": 0.3752, "step": 2227 }, { "epoch": 7.682758620689655, "grad_norm": 2.3051371574401855, "learning_rate": 9.206896551724138e-06, "loss": 0.4311, "step": 2228 }, { "epoch": 7.686206896551724, "grad_norm": 2.6708731651306152, "learning_rate": 9.21103448275862e-06, "loss": 0.4794, "step": 2229 }, { "epoch": 7.689655172413794, "grad_norm": 5.119457244873047, "learning_rate": 9.215172413793104e-06, "loss": 0.7148, "step": 2230 }, { "epoch": 7.6931034482758625, "grad_norm": 1.1281062364578247, "learning_rate": 9.219310344827586e-06, "loss": 0.5034, "step": 2231 }, { "epoch": 7.696551724137931, "grad_norm": 0.8448970317840576, "learning_rate": 9.22344827586207e-06, "loss": 0.3736, "step": 2232 }, { "epoch": 7.7, "grad_norm": 1.9517457485198975, "learning_rate": 9.227586206896554e-06, "loss": 0.4089, "step": 2233 }, { "epoch": 7.703448275862069, "grad_norm": 0.9104092121124268, "learning_rate": 9.231724137931034e-06, "loss": 0.4153, "step": 2234 }, { "epoch": 7.706896551724138, "grad_norm": 0.9994204640388489, "learning_rate": 9.235862068965518e-06, "loss": 0.3708, "step": 2235 }, { "epoch": 7.710344827586207, "grad_norm": 0.8654398918151855, "learning_rate": 9.24e-06, "loss": 0.3726, "step": 2236 }, { "epoch": 7.713793103448276, "grad_norm": 1.3976999521255493, "learning_rate": 9.244137931034483e-06, "loss": 0.4234, "step": 2237 }, { "epoch": 7.7172413793103445, "grad_norm": 1.3625798225402832, "learning_rate": 9.248275862068965e-06, "loss": 0.4046, "step": 2238 }, { "epoch": 7.720689655172414, "grad_norm": 1.9385970830917358, "learning_rate": 9.252413793103449e-06, "loss": 0.3833, "step": 2239 }, { "epoch": 7.724137931034483, "grad_norm": 1.2018134593963623, "learning_rate": 9.256551724137931e-06, "loss": 0.3761, "step": 2240 }, { "epoch": 7.727586206896552, "grad_norm": 0.9981000423431396, "learning_rate": 9.260689655172415e-06, "loss": 0.433, "step": 2241 }, { "epoch": 7.731034482758621, "grad_norm": 1.3945374488830566, "learning_rate": 9.264827586206897e-06, "loss": 0.4202, "step": 2242 }, { "epoch": 7.73448275862069, "grad_norm": 2.072640895843506, "learning_rate": 9.26896551724138e-06, "loss": 0.3611, "step": 2243 }, { "epoch": 7.7379310344827585, "grad_norm": 1.136539101600647, "learning_rate": 9.273103448275863e-06, "loss": 0.3997, "step": 2244 }, { "epoch": 7.741379310344827, "grad_norm": 1.0544764995574951, "learning_rate": 9.277241379310346e-06, "loss": 0.3881, "step": 2245 }, { "epoch": 7.744827586206896, "grad_norm": 1.1523357629776, "learning_rate": 9.281379310344828e-06, "loss": 0.4062, "step": 2246 }, { "epoch": 7.748275862068965, "grad_norm": 1.6596592664718628, "learning_rate": 9.28551724137931e-06, "loss": 0.4067, "step": 2247 }, { "epoch": 7.751724137931035, "grad_norm": 1.3579983711242676, "learning_rate": 9.289655172413792e-06, "loss": 0.3888, "step": 2248 }, { "epoch": 7.755172413793104, "grad_norm": 1.177811622619629, "learning_rate": 9.293793103448276e-06, "loss": 0.372, "step": 2249 }, { "epoch": 7.758620689655173, "grad_norm": 1.4510847330093384, "learning_rate": 9.297931034482758e-06, "loss": 0.423, "step": 2250 }, { "epoch": 7.7620689655172415, "grad_norm": 1.2967690229415894, "learning_rate": 9.302068965517242e-06, "loss": 0.4492, "step": 2251 }, { "epoch": 7.76551724137931, "grad_norm": 1.998538851737976, "learning_rate": 9.306206896551724e-06, "loss": 0.4516, "step": 2252 }, { "epoch": 7.768965517241379, "grad_norm": 1.3166166543960571, "learning_rate": 9.310344827586207e-06, "loss": 0.445, "step": 2253 }, { "epoch": 7.772413793103448, "grad_norm": 1.7035328149795532, "learning_rate": 9.31448275862069e-06, "loss": 0.4757, "step": 2254 }, { "epoch": 7.775862068965517, "grad_norm": 2.154865264892578, "learning_rate": 9.318620689655173e-06, "loss": 0.6091, "step": 2255 }, { "epoch": 7.779310344827586, "grad_norm": 0.9483218789100647, "learning_rate": 9.322758620689655e-06, "loss": 0.4538, "step": 2256 }, { "epoch": 7.7827586206896555, "grad_norm": 1.0719192028045654, "learning_rate": 9.326896551724139e-06, "loss": 0.4334, "step": 2257 }, { "epoch": 7.786206896551724, "grad_norm": 0.8214827179908752, "learning_rate": 9.331034482758621e-06, "loss": 0.3799, "step": 2258 }, { "epoch": 7.789655172413793, "grad_norm": 0.8887372612953186, "learning_rate": 9.335172413793103e-06, "loss": 0.3944, "step": 2259 }, { "epoch": 7.793103448275862, "grad_norm": 0.944793164730072, "learning_rate": 9.339310344827585e-06, "loss": 0.443, "step": 2260 }, { "epoch": 7.796551724137931, "grad_norm": 1.0334010124206543, "learning_rate": 9.343448275862069e-06, "loss": 0.3828, "step": 2261 }, { "epoch": 7.8, "grad_norm": 1.1502193212509155, "learning_rate": 9.34758620689655e-06, "loss": 0.4288, "step": 2262 }, { "epoch": 7.803448275862069, "grad_norm": 1.1628177165985107, "learning_rate": 9.351724137931034e-06, "loss": 0.4636, "step": 2263 }, { "epoch": 7.8068965517241375, "grad_norm": 0.8443792462348938, "learning_rate": 9.355862068965516e-06, "loss": 0.3959, "step": 2264 }, { "epoch": 7.810344827586206, "grad_norm": 1.0659632682800293, "learning_rate": 9.36e-06, "loss": 0.4239, "step": 2265 }, { "epoch": 7.813793103448276, "grad_norm": 1.9573338031768799, "learning_rate": 9.364137931034484e-06, "loss": 0.4107, "step": 2266 }, { "epoch": 7.817241379310345, "grad_norm": 0.9826457500457764, "learning_rate": 9.368275862068966e-06, "loss": 0.3992, "step": 2267 }, { "epoch": 7.820689655172414, "grad_norm": 1.0651394128799438, "learning_rate": 9.37241379310345e-06, "loss": 0.3801, "step": 2268 }, { "epoch": 7.824137931034483, "grad_norm": 1.601267695426941, "learning_rate": 9.376551724137932e-06, "loss": 0.4072, "step": 2269 }, { "epoch": 7.827586206896552, "grad_norm": 1.0895260572433472, "learning_rate": 9.380689655172415e-06, "loss": 0.3972, "step": 2270 }, { "epoch": 7.8310344827586205, "grad_norm": 1.1008331775665283, "learning_rate": 9.384827586206897e-06, "loss": 0.3676, "step": 2271 }, { "epoch": 7.834482758620689, "grad_norm": 1.0804451704025269, "learning_rate": 9.38896551724138e-06, "loss": 0.3952, "step": 2272 }, { "epoch": 7.837931034482759, "grad_norm": 1.1710691452026367, "learning_rate": 9.393103448275861e-06, "loss": 0.3873, "step": 2273 }, { "epoch": 7.841379310344828, "grad_norm": 1.3710957765579224, "learning_rate": 9.397241379310345e-06, "loss": 0.4029, "step": 2274 }, { "epoch": 7.844827586206897, "grad_norm": 1.5435378551483154, "learning_rate": 9.401379310344827e-06, "loss": 0.4447, "step": 2275 }, { "epoch": 7.848275862068966, "grad_norm": 1.4787694215774536, "learning_rate": 9.40551724137931e-06, "loss": 0.4713, "step": 2276 }, { "epoch": 7.8517241379310345, "grad_norm": 2.0912368297576904, "learning_rate": 9.409655172413793e-06, "loss": 0.3948, "step": 2277 }, { "epoch": 7.855172413793103, "grad_norm": 1.682320475578308, "learning_rate": 9.413793103448277e-06, "loss": 0.4336, "step": 2278 }, { "epoch": 7.858620689655172, "grad_norm": 1.8233436346054077, "learning_rate": 9.417931034482759e-06, "loss": 0.4735, "step": 2279 }, { "epoch": 7.862068965517241, "grad_norm": 2.445082426071167, "learning_rate": 9.422068965517242e-06, "loss": 0.5951, "step": 2280 }, { "epoch": 7.86551724137931, "grad_norm": 0.9558900594711304, "learning_rate": 9.426206896551724e-06, "loss": 0.4314, "step": 2281 }, { "epoch": 7.86896551724138, "grad_norm": 0.7824614644050598, "learning_rate": 9.430344827586208e-06, "loss": 0.3965, "step": 2282 }, { "epoch": 7.872413793103449, "grad_norm": 1.1691139936447144, "learning_rate": 9.43448275862069e-06, "loss": 0.3846, "step": 2283 }, { "epoch": 7.875862068965517, "grad_norm": 1.0063579082489014, "learning_rate": 9.438620689655174e-06, "loss": 0.4209, "step": 2284 }, { "epoch": 7.879310344827586, "grad_norm": 1.0607926845550537, "learning_rate": 9.442758620689654e-06, "loss": 0.3761, "step": 2285 }, { "epoch": 7.882758620689655, "grad_norm": 1.2688567638397217, "learning_rate": 9.446896551724138e-06, "loss": 0.3567, "step": 2286 }, { "epoch": 7.886206896551724, "grad_norm": 1.106147050857544, "learning_rate": 9.45103448275862e-06, "loss": 0.3927, "step": 2287 }, { "epoch": 7.889655172413793, "grad_norm": 0.9745606184005737, "learning_rate": 9.455172413793104e-06, "loss": 0.3836, "step": 2288 }, { "epoch": 7.893103448275862, "grad_norm": 2.625335454940796, "learning_rate": 9.459310344827586e-06, "loss": 0.3607, "step": 2289 }, { "epoch": 7.896551724137931, "grad_norm": 1.2243562936782837, "learning_rate": 9.46344827586207e-06, "loss": 0.3973, "step": 2290 }, { "epoch": 7.9, "grad_norm": 1.2493386268615723, "learning_rate": 9.467586206896551e-06, "loss": 0.3973, "step": 2291 }, { "epoch": 7.903448275862069, "grad_norm": 1.122357726097107, "learning_rate": 9.471724137931035e-06, "loss": 0.4128, "step": 2292 }, { "epoch": 7.906896551724138, "grad_norm": 1.4031096696853638, "learning_rate": 9.475862068965517e-06, "loss": 0.3905, "step": 2293 }, { "epoch": 7.910344827586207, "grad_norm": 1.8607025146484375, "learning_rate": 9.48e-06, "loss": 0.4363, "step": 2294 }, { "epoch": 7.913793103448276, "grad_norm": 1.372784972190857, "learning_rate": 9.484137931034483e-06, "loss": 0.4094, "step": 2295 }, { "epoch": 7.917241379310345, "grad_norm": 3.2091331481933594, "learning_rate": 9.488275862068966e-06, "loss": 0.3798, "step": 2296 }, { "epoch": 7.9206896551724135, "grad_norm": 1.132907748222351, "learning_rate": 9.492413793103448e-06, "loss": 0.3875, "step": 2297 }, { "epoch": 7.924137931034482, "grad_norm": 2.078660011291504, "learning_rate": 9.49655172413793e-06, "loss": 0.4227, "step": 2298 }, { "epoch": 7.927586206896551, "grad_norm": 1.220297932624817, "learning_rate": 9.500689655172414e-06, "loss": 0.3916, "step": 2299 }, { "epoch": 7.931034482758621, "grad_norm": 1.593618392944336, "learning_rate": 9.504827586206896e-06, "loss": 0.4338, "step": 2300 }, { "epoch": 7.93448275862069, "grad_norm": 1.2740709781646729, "learning_rate": 9.50896551724138e-06, "loss": 0.4308, "step": 2301 }, { "epoch": 7.937931034482759, "grad_norm": 2.173532485961914, "learning_rate": 9.513103448275862e-06, "loss": 0.4095, "step": 2302 }, { "epoch": 7.941379310344828, "grad_norm": 1.4108402729034424, "learning_rate": 9.517241379310346e-06, "loss": 0.4098, "step": 2303 }, { "epoch": 7.944827586206896, "grad_norm": 1.7546855211257935, "learning_rate": 9.521379310344828e-06, "loss": 0.5019, "step": 2304 }, { "epoch": 7.948275862068965, "grad_norm": 4.7618255615234375, "learning_rate": 9.525517241379311e-06, "loss": 0.6145, "step": 2305 }, { "epoch": 7.951724137931034, "grad_norm": 1.1544150114059448, "learning_rate": 9.529655172413793e-06, "loss": 0.4249, "step": 2306 }, { "epoch": 7.955172413793104, "grad_norm": 0.8919119238853455, "learning_rate": 9.533793103448277e-06, "loss": 0.44, "step": 2307 }, { "epoch": 7.958620689655173, "grad_norm": 0.9048497080802917, "learning_rate": 9.537931034482759e-06, "loss": 0.39, "step": 2308 }, { "epoch": 7.962068965517242, "grad_norm": 0.9528817534446716, "learning_rate": 9.542068965517243e-06, "loss": 0.3972, "step": 2309 }, { "epoch": 7.9655172413793105, "grad_norm": 0.9042252898216248, "learning_rate": 9.546206896551725e-06, "loss": 0.3525, "step": 2310 }, { "epoch": 7.968965517241379, "grad_norm": 0.8594781756401062, "learning_rate": 9.550344827586207e-06, "loss": 0.3677, "step": 2311 }, { "epoch": 7.972413793103448, "grad_norm": 0.9882687926292419, "learning_rate": 9.554482758620689e-06, "loss": 0.417, "step": 2312 }, { "epoch": 7.975862068965517, "grad_norm": 0.8970781564712524, "learning_rate": 9.558620689655173e-06, "loss": 0.382, "step": 2313 }, { "epoch": 7.979310344827586, "grad_norm": 1.5629407167434692, "learning_rate": 9.562758620689655e-06, "loss": 0.4234, "step": 2314 }, { "epoch": 7.982758620689655, "grad_norm": 1.3795979022979736, "learning_rate": 9.566896551724138e-06, "loss": 0.3951, "step": 2315 }, { "epoch": 7.9862068965517246, "grad_norm": 2.3763585090637207, "learning_rate": 9.57103448275862e-06, "loss": 0.4486, "step": 2316 }, { "epoch": 7.989655172413793, "grad_norm": 1.5003228187561035, "learning_rate": 9.575172413793104e-06, "loss": 0.3814, "step": 2317 }, { "epoch": 7.993103448275862, "grad_norm": 1.222848892211914, "learning_rate": 9.579310344827586e-06, "loss": 0.4233, "step": 2318 }, { "epoch": 7.996551724137931, "grad_norm": 1.4563320875167847, "learning_rate": 9.58344827586207e-06, "loss": 0.4981, "step": 2319 }, { "epoch": 8.0, "grad_norm": 1.9630495309829712, "learning_rate": 9.587586206896552e-06, "loss": 0.5659, "step": 2320 }, { "epoch": 8.00344827586207, "grad_norm": 0.8871464729309082, "learning_rate": 9.591724137931036e-06, "loss": 0.4723, "step": 2321 }, { "epoch": 8.006896551724138, "grad_norm": 0.9386507868766785, "learning_rate": 9.595862068965518e-06, "loss": 0.3757, "step": 2322 }, { "epoch": 8.010344827586207, "grad_norm": 0.9456931352615356, "learning_rate": 9.600000000000001e-06, "loss": 0.4203, "step": 2323 }, { "epoch": 8.013793103448275, "grad_norm": 0.8678559064865112, "learning_rate": 9.604137931034482e-06, "loss": 0.3616, "step": 2324 }, { "epoch": 8.017241379310345, "grad_norm": 0.9947382211685181, "learning_rate": 9.608275862068965e-06, "loss": 0.3676, "step": 2325 }, { "epoch": 8.020689655172413, "grad_norm": 1.1524215936660767, "learning_rate": 9.612413793103447e-06, "loss": 0.3822, "step": 2326 }, { "epoch": 8.024137931034483, "grad_norm": 1.1106339693069458, "learning_rate": 9.616551724137931e-06, "loss": 0.3917, "step": 2327 }, { "epoch": 8.027586206896551, "grad_norm": 1.0629539489746094, "learning_rate": 9.620689655172413e-06, "loss": 0.3772, "step": 2328 }, { "epoch": 8.03103448275862, "grad_norm": 0.9557615518569946, "learning_rate": 9.624827586206897e-06, "loss": 0.3691, "step": 2329 }, { "epoch": 8.03448275862069, "grad_norm": 0.8600583672523499, "learning_rate": 9.628965517241379e-06, "loss": 0.3641, "step": 2330 }, { "epoch": 8.037931034482758, "grad_norm": 1.141033411026001, "learning_rate": 9.633103448275862e-06, "loss": 0.3423, "step": 2331 }, { "epoch": 8.041379310344828, "grad_norm": 1.0340917110443115, "learning_rate": 9.637241379310346e-06, "loss": 0.4209, "step": 2332 }, { "epoch": 8.044827586206896, "grad_norm": 1.0051631927490234, "learning_rate": 9.641379310344828e-06, "loss": 0.3655, "step": 2333 }, { "epoch": 8.048275862068966, "grad_norm": 1.055050015449524, "learning_rate": 9.645517241379312e-06, "loss": 0.4175, "step": 2334 }, { "epoch": 8.051724137931034, "grad_norm": 1.0937201976776123, "learning_rate": 9.649655172413794e-06, "loss": 0.3891, "step": 2335 }, { "epoch": 8.055172413793104, "grad_norm": 1.1318057775497437, "learning_rate": 9.653793103448278e-06, "loss": 0.3644, "step": 2336 }, { "epoch": 8.058620689655172, "grad_norm": 1.0038537979125977, "learning_rate": 9.657931034482758e-06, "loss": 0.3467, "step": 2337 }, { "epoch": 8.062068965517241, "grad_norm": 1.0698168277740479, "learning_rate": 9.662068965517242e-06, "loss": 0.3991, "step": 2338 }, { "epoch": 8.065517241379311, "grad_norm": 1.4067537784576416, "learning_rate": 9.666206896551724e-06, "loss": 0.4535, "step": 2339 }, { "epoch": 8.068965517241379, "grad_norm": 1.1627627611160278, "learning_rate": 9.670344827586207e-06, "loss": 0.4093, "step": 2340 }, { "epoch": 8.072413793103449, "grad_norm": 1.220166802406311, "learning_rate": 9.67448275862069e-06, "loss": 0.393, "step": 2341 }, { "epoch": 8.075862068965517, "grad_norm": 1.706939458847046, "learning_rate": 9.678620689655173e-06, "loss": 0.4076, "step": 2342 }, { "epoch": 8.079310344827586, "grad_norm": 1.4200917482376099, "learning_rate": 9.682758620689655e-06, "loss": 0.4475, "step": 2343 }, { "epoch": 8.082758620689654, "grad_norm": 1.8430383205413818, "learning_rate": 9.686896551724139e-06, "loss": 0.4448, "step": 2344 }, { "epoch": 8.086206896551724, "grad_norm": 3.2273147106170654, "learning_rate": 9.691034482758621e-06, "loss": 0.536, "step": 2345 }, { "epoch": 8.089655172413794, "grad_norm": 1.04346764087677, "learning_rate": 9.695172413793105e-06, "loss": 0.386, "step": 2346 }, { "epoch": 8.093103448275862, "grad_norm": 0.8139097094535828, "learning_rate": 9.699310344827587e-06, "loss": 0.3843, "step": 2347 }, { "epoch": 8.096551724137932, "grad_norm": 0.9576451778411865, "learning_rate": 9.70344827586207e-06, "loss": 0.4231, "step": 2348 }, { "epoch": 8.1, "grad_norm": 1.015474557876587, "learning_rate": 9.707586206896552e-06, "loss": 0.3995, "step": 2349 }, { "epoch": 8.10344827586207, "grad_norm": 0.9303961992263794, "learning_rate": 9.711724137931034e-06, "loss": 0.4024, "step": 2350 }, { "epoch": 8.106896551724137, "grad_norm": 0.979558527469635, "learning_rate": 9.715862068965516e-06, "loss": 0.3688, "step": 2351 }, { "epoch": 8.110344827586207, "grad_norm": 0.9767018556594849, "learning_rate": 9.72e-06, "loss": 0.3688, "step": 2352 }, { "epoch": 8.113793103448275, "grad_norm": 0.8363773226737976, "learning_rate": 9.724137931034482e-06, "loss": 0.3948, "step": 2353 }, { "epoch": 8.117241379310345, "grad_norm": 1.2726835012435913, "learning_rate": 9.728275862068966e-06, "loss": 0.3985, "step": 2354 }, { "epoch": 8.120689655172415, "grad_norm": 1.1630443334579468, "learning_rate": 9.732413793103448e-06, "loss": 0.4099, "step": 2355 }, { "epoch": 8.124137931034483, "grad_norm": 0.9729044437408447, "learning_rate": 9.736551724137932e-06, "loss": 0.3744, "step": 2356 }, { "epoch": 8.127586206896552, "grad_norm": 0.8423942923545837, "learning_rate": 9.740689655172414e-06, "loss": 0.3771, "step": 2357 }, { "epoch": 8.13103448275862, "grad_norm": 1.1001287698745728, "learning_rate": 9.744827586206897e-06, "loss": 0.3949, "step": 2358 }, { "epoch": 8.13448275862069, "grad_norm": 1.052502155303955, "learning_rate": 9.74896551724138e-06, "loss": 0.3963, "step": 2359 }, { "epoch": 8.137931034482758, "grad_norm": 0.9554281234741211, "learning_rate": 9.753103448275863e-06, "loss": 0.3977, "step": 2360 }, { "epoch": 8.141379310344828, "grad_norm": 1.1926219463348389, "learning_rate": 9.757241379310345e-06, "loss": 0.3792, "step": 2361 }, { "epoch": 8.144827586206896, "grad_norm": 1.21049964427948, "learning_rate": 9.761379310344827e-06, "loss": 0.3269, "step": 2362 }, { "epoch": 8.148275862068965, "grad_norm": 1.2935538291931152, "learning_rate": 9.765517241379309e-06, "loss": 0.3699, "step": 2363 }, { "epoch": 8.151724137931035, "grad_norm": 1.2166334390640259, "learning_rate": 9.769655172413793e-06, "loss": 0.399, "step": 2364 }, { "epoch": 8.155172413793103, "grad_norm": 1.6384574174880981, "learning_rate": 9.773793103448277e-06, "loss": 0.3845, "step": 2365 }, { "epoch": 8.158620689655173, "grad_norm": 1.2748346328735352, "learning_rate": 9.777931034482759e-06, "loss": 0.4363, "step": 2366 }, { "epoch": 8.162068965517241, "grad_norm": 2.299952507019043, "learning_rate": 9.782068965517242e-06, "loss": 0.3988, "step": 2367 }, { "epoch": 8.16551724137931, "grad_norm": 2.6963605880737305, "learning_rate": 9.786206896551724e-06, "loss": 0.4643, "step": 2368 }, { "epoch": 8.168965517241379, "grad_norm": 1.7631193399429321, "learning_rate": 9.790344827586208e-06, "loss": 0.4927, "step": 2369 }, { "epoch": 8.172413793103448, "grad_norm": 3.418078660964966, "learning_rate": 9.79448275862069e-06, "loss": 0.5483, "step": 2370 }, { "epoch": 8.175862068965516, "grad_norm": 0.7430515289306641, "learning_rate": 9.798620689655174e-06, "loss": 0.4421, "step": 2371 }, { "epoch": 8.179310344827586, "grad_norm": 0.8675023317337036, "learning_rate": 9.802758620689656e-06, "loss": 0.4236, "step": 2372 }, { "epoch": 8.182758620689656, "grad_norm": 0.6856626272201538, "learning_rate": 9.80689655172414e-06, "loss": 0.3734, "step": 2373 }, { "epoch": 8.186206896551724, "grad_norm": 0.7695475816726685, "learning_rate": 9.811034482758621e-06, "loss": 0.4304, "step": 2374 }, { "epoch": 8.189655172413794, "grad_norm": 0.8779867887496948, "learning_rate": 9.815172413793103e-06, "loss": 0.418, "step": 2375 }, { "epoch": 8.193103448275862, "grad_norm": 0.9100344777107239, "learning_rate": 9.819310344827585e-06, "loss": 0.3651, "step": 2376 }, { "epoch": 8.196551724137931, "grad_norm": 0.9314269423484802, "learning_rate": 9.82344827586207e-06, "loss": 0.4032, "step": 2377 }, { "epoch": 8.2, "grad_norm": 0.8682388067245483, "learning_rate": 9.827586206896551e-06, "loss": 0.4053, "step": 2378 }, { "epoch": 8.203448275862069, "grad_norm": 0.8203408718109131, "learning_rate": 9.831724137931035e-06, "loss": 0.3526, "step": 2379 }, { "epoch": 8.206896551724139, "grad_norm": 0.8921285271644592, "learning_rate": 9.835862068965517e-06, "loss": 0.3602, "step": 2380 }, { "epoch": 8.210344827586207, "grad_norm": 1.7972371578216553, "learning_rate": 9.84e-06, "loss": 0.3808, "step": 2381 }, { "epoch": 8.213793103448277, "grad_norm": 0.9632447957992554, "learning_rate": 9.844137931034483e-06, "loss": 0.366, "step": 2382 }, { "epoch": 8.217241379310344, "grad_norm": 1.9392080307006836, "learning_rate": 9.848275862068966e-06, "loss": 0.4071, "step": 2383 }, { "epoch": 8.220689655172414, "grad_norm": 1.0980864763259888, "learning_rate": 9.852413793103448e-06, "loss": 0.3901, "step": 2384 }, { "epoch": 8.224137931034482, "grad_norm": 1.1395412683486938, "learning_rate": 9.856551724137932e-06, "loss": 0.4099, "step": 2385 }, { "epoch": 8.227586206896552, "grad_norm": 1.3230575323104858, "learning_rate": 9.860689655172414e-06, "loss": 0.4233, "step": 2386 }, { "epoch": 8.23103448275862, "grad_norm": 1.0059036016464233, "learning_rate": 9.864827586206898e-06, "loss": 0.3944, "step": 2387 }, { "epoch": 8.23448275862069, "grad_norm": 1.0326162576675415, "learning_rate": 9.868965517241378e-06, "loss": 0.4303, "step": 2388 }, { "epoch": 8.23793103448276, "grad_norm": 2.650991916656494, "learning_rate": 9.873103448275862e-06, "loss": 0.4072, "step": 2389 }, { "epoch": 8.241379310344827, "grad_norm": 1.114986777305603, "learning_rate": 9.877241379310344e-06, "loss": 0.3919, "step": 2390 }, { "epoch": 8.244827586206897, "grad_norm": 2.125778913497925, "learning_rate": 9.881379310344828e-06, "loss": 0.4497, "step": 2391 }, { "epoch": 8.248275862068965, "grad_norm": 1.3734065294265747, "learning_rate": 9.88551724137931e-06, "loss": 0.3907, "step": 2392 }, { "epoch": 8.251724137931035, "grad_norm": 1.3142105340957642, "learning_rate": 9.889655172413793e-06, "loss": 0.4068, "step": 2393 }, { "epoch": 8.255172413793103, "grad_norm": 1.9965190887451172, "learning_rate": 9.893793103448275e-06, "loss": 0.4258, "step": 2394 }, { "epoch": 8.258620689655173, "grad_norm": 9.063660621643066, "learning_rate": 9.897931034482759e-06, "loss": 0.6367, "step": 2395 }, { "epoch": 8.26206896551724, "grad_norm": 1.2180730104446411, "learning_rate": 9.902068965517241e-06, "loss": 0.4531, "step": 2396 }, { "epoch": 8.26551724137931, "grad_norm": 1.0046309232711792, "learning_rate": 9.906206896551725e-06, "loss": 0.4096, "step": 2397 }, { "epoch": 8.26896551724138, "grad_norm": 0.7971204519271851, "learning_rate": 9.910344827586209e-06, "loss": 0.3799, "step": 2398 }, { "epoch": 8.272413793103448, "grad_norm": 0.9133461713790894, "learning_rate": 9.91448275862069e-06, "loss": 0.3737, "step": 2399 }, { "epoch": 8.275862068965518, "grad_norm": 0.799087405204773, "learning_rate": 9.918620689655174e-06, "loss": 0.3972, "step": 2400 }, { "epoch": 8.279310344827586, "grad_norm": 1.256496548652649, "learning_rate": 9.922758620689655e-06, "loss": 0.3933, "step": 2401 }, { "epoch": 8.282758620689656, "grad_norm": 0.8601134419441223, "learning_rate": 9.926896551724138e-06, "loss": 0.3713, "step": 2402 }, { "epoch": 8.286206896551723, "grad_norm": 0.9649903178215027, "learning_rate": 9.93103448275862e-06, "loss": 0.4105, "step": 2403 }, { "epoch": 8.289655172413793, "grad_norm": 0.9236730933189392, "learning_rate": 9.935172413793104e-06, "loss": 0.3985, "step": 2404 }, { "epoch": 8.293103448275861, "grad_norm": 1.0197901725769043, "learning_rate": 9.939310344827586e-06, "loss": 0.3822, "step": 2405 }, { "epoch": 8.296551724137931, "grad_norm": 1.0560780763626099, "learning_rate": 9.94344827586207e-06, "loss": 0.3662, "step": 2406 }, { "epoch": 8.3, "grad_norm": 0.9277490377426147, "learning_rate": 9.947586206896552e-06, "loss": 0.3865, "step": 2407 }, { "epoch": 8.303448275862069, "grad_norm": 2.2847888469696045, "learning_rate": 9.951724137931035e-06, "loss": 0.3707, "step": 2408 }, { "epoch": 8.306896551724138, "grad_norm": 1.1124602556228638, "learning_rate": 9.955862068965517e-06, "loss": 0.3532, "step": 2409 }, { "epoch": 8.310344827586206, "grad_norm": 1.018133521080017, "learning_rate": 9.960000000000001e-06, "loss": 0.3893, "step": 2410 }, { "epoch": 8.313793103448276, "grad_norm": 0.8776049613952637, "learning_rate": 9.964137931034483e-06, "loss": 0.3666, "step": 2411 }, { "epoch": 8.317241379310344, "grad_norm": 1.7728999853134155, "learning_rate": 9.968275862068967e-06, "loss": 0.3947, "step": 2412 }, { "epoch": 8.320689655172414, "grad_norm": 1.136776328086853, "learning_rate": 9.972413793103449e-06, "loss": 0.3598, "step": 2413 }, { "epoch": 8.324137931034482, "grad_norm": 1.3076255321502686, "learning_rate": 9.976551724137931e-06, "loss": 0.3737, "step": 2414 }, { "epoch": 8.327586206896552, "grad_norm": 1.1578031778335571, "learning_rate": 9.980689655172413e-06, "loss": 0.4375, "step": 2415 }, { "epoch": 8.331034482758621, "grad_norm": 1.1163114309310913, "learning_rate": 9.984827586206897e-06, "loss": 0.4483, "step": 2416 }, { "epoch": 8.33448275862069, "grad_norm": 1.2357248067855835, "learning_rate": 9.988965517241379e-06, "loss": 0.3694, "step": 2417 }, { "epoch": 8.337931034482759, "grad_norm": 2.393838405609131, "learning_rate": 9.993103448275862e-06, "loss": 0.4135, "step": 2418 }, { "epoch": 8.341379310344827, "grad_norm": 2.3140742778778076, "learning_rate": 9.997241379310344e-06, "loss": 0.4605, "step": 2419 }, { "epoch": 8.344827586206897, "grad_norm": 3.4510385990142822, "learning_rate": 1.0001379310344828e-05, "loss": 0.6145, "step": 2420 }, { "epoch": 8.348275862068965, "grad_norm": 0.8150730729103088, "learning_rate": 1.000551724137931e-05, "loss": 0.4242, "step": 2421 }, { "epoch": 8.351724137931035, "grad_norm": 0.7454983592033386, "learning_rate": 1.0009655172413794e-05, "loss": 0.3968, "step": 2422 }, { "epoch": 8.355172413793104, "grad_norm": 0.819017231464386, "learning_rate": 1.0013793103448276e-05, "loss": 0.395, "step": 2423 }, { "epoch": 8.358620689655172, "grad_norm": 0.7235642671585083, "learning_rate": 1.001793103448276e-05, "loss": 0.3992, "step": 2424 }, { "epoch": 8.362068965517242, "grad_norm": 0.9616053700447083, "learning_rate": 1.0022068965517242e-05, "loss": 0.3967, "step": 2425 }, { "epoch": 8.36551724137931, "grad_norm": 1.2271820306777954, "learning_rate": 1.0026206896551725e-05, "loss": 0.3868, "step": 2426 }, { "epoch": 8.36896551724138, "grad_norm": 0.8559068441390991, "learning_rate": 1.0030344827586206e-05, "loss": 0.3943, "step": 2427 }, { "epoch": 8.372413793103448, "grad_norm": 1.1781460046768188, "learning_rate": 1.003448275862069e-05, "loss": 0.379, "step": 2428 }, { "epoch": 8.375862068965517, "grad_norm": 0.7901871204376221, "learning_rate": 1.0038620689655171e-05, "loss": 0.3654, "step": 2429 }, { "epoch": 8.379310344827585, "grad_norm": 1.0421156883239746, "learning_rate": 1.0042758620689655e-05, "loss": 0.3656, "step": 2430 }, { "epoch": 8.382758620689655, "grad_norm": 1.048080325126648, "learning_rate": 1.0046896551724139e-05, "loss": 0.4135, "step": 2431 }, { "epoch": 8.386206896551725, "grad_norm": 1.1624823808670044, "learning_rate": 1.005103448275862e-05, "loss": 0.3793, "step": 2432 }, { "epoch": 8.389655172413793, "grad_norm": 1.5312831401824951, "learning_rate": 1.0055172413793105e-05, "loss": 0.379, "step": 2433 }, { "epoch": 8.393103448275863, "grad_norm": 1.2038164138793945, "learning_rate": 1.0059310344827587e-05, "loss": 0.3716, "step": 2434 }, { "epoch": 8.39655172413793, "grad_norm": 1.0132368803024292, "learning_rate": 1.006344827586207e-05, "loss": 0.3754, "step": 2435 }, { "epoch": 8.4, "grad_norm": 1.4230060577392578, "learning_rate": 1.0067586206896552e-05, "loss": 0.4187, "step": 2436 }, { "epoch": 8.403448275862068, "grad_norm": 1.6486294269561768, "learning_rate": 1.0071724137931036e-05, "loss": 0.3911, "step": 2437 }, { "epoch": 8.406896551724138, "grad_norm": 1.1634994745254517, "learning_rate": 1.0075862068965518e-05, "loss": 0.4091, "step": 2438 }, { "epoch": 8.410344827586206, "grad_norm": 1.387668251991272, "learning_rate": 1.008e-05, "loss": 0.3909, "step": 2439 }, { "epoch": 8.413793103448276, "grad_norm": 1.4630275964736938, "learning_rate": 1.0084137931034482e-05, "loss": 0.4637, "step": 2440 }, { "epoch": 8.417241379310346, "grad_norm": 1.5220102071762085, "learning_rate": 1.0088275862068966e-05, "loss": 0.4322, "step": 2441 }, { "epoch": 8.420689655172414, "grad_norm": 1.2301534414291382, "learning_rate": 1.0092413793103448e-05, "loss": 0.3816, "step": 2442 }, { "epoch": 8.424137931034483, "grad_norm": 2.381126880645752, "learning_rate": 1.0096551724137932e-05, "loss": 0.4342, "step": 2443 }, { "epoch": 8.427586206896551, "grad_norm": 1.4352307319641113, "learning_rate": 1.0100689655172414e-05, "loss": 0.4593, "step": 2444 }, { "epoch": 8.431034482758621, "grad_norm": 2.3325064182281494, "learning_rate": 1.0104827586206897e-05, "loss": 0.531, "step": 2445 }, { "epoch": 8.434482758620689, "grad_norm": 0.658314049243927, "learning_rate": 1.010896551724138e-05, "loss": 0.3948, "step": 2446 }, { "epoch": 8.437931034482759, "grad_norm": 0.6702313423156738, "learning_rate": 1.0113103448275863e-05, "loss": 0.3717, "step": 2447 }, { "epoch": 8.441379310344828, "grad_norm": 0.9105868935585022, "learning_rate": 1.0117241379310345e-05, "loss": 0.3972, "step": 2448 }, { "epoch": 8.444827586206896, "grad_norm": 0.7636206150054932, "learning_rate": 1.0121379310344829e-05, "loss": 0.3431, "step": 2449 }, { "epoch": 8.448275862068966, "grad_norm": 0.7782055139541626, "learning_rate": 1.012551724137931e-05, "loss": 0.3949, "step": 2450 }, { "epoch": 8.451724137931034, "grad_norm": 0.7945952415466309, "learning_rate": 1.0129655172413794e-05, "loss": 0.3838, "step": 2451 }, { "epoch": 8.455172413793104, "grad_norm": 0.8510268330574036, "learning_rate": 1.0133793103448275e-05, "loss": 0.3826, "step": 2452 }, { "epoch": 8.458620689655172, "grad_norm": 1.026050329208374, "learning_rate": 1.0137931034482758e-05, "loss": 0.3975, "step": 2453 }, { "epoch": 8.462068965517242, "grad_norm": 0.821258008480072, "learning_rate": 1.014206896551724e-05, "loss": 0.3694, "step": 2454 }, { "epoch": 8.46551724137931, "grad_norm": 1.0681475400924683, "learning_rate": 1.0146206896551724e-05, "loss": 0.3837, "step": 2455 }, { "epoch": 8.46896551724138, "grad_norm": 0.9036635756492615, "learning_rate": 1.0150344827586206e-05, "loss": 0.362, "step": 2456 }, { "epoch": 8.472413793103449, "grad_norm": 1.0273518562316895, "learning_rate": 1.015448275862069e-05, "loss": 0.3926, "step": 2457 }, { "epoch": 8.475862068965517, "grad_norm": 1.0159246921539307, "learning_rate": 1.0158620689655172e-05, "loss": 0.368, "step": 2458 }, { "epoch": 8.479310344827587, "grad_norm": 1.2569267749786377, "learning_rate": 1.0162758620689656e-05, "loss": 0.369, "step": 2459 }, { "epoch": 8.482758620689655, "grad_norm": 1.0417625904083252, "learning_rate": 1.0166896551724138e-05, "loss": 0.3439, "step": 2460 }, { "epoch": 8.486206896551725, "grad_norm": 1.031754970550537, "learning_rate": 1.0171034482758621e-05, "loss": 0.372, "step": 2461 }, { "epoch": 8.489655172413793, "grad_norm": 1.0797312259674072, "learning_rate": 1.0175172413793103e-05, "loss": 0.3952, "step": 2462 }, { "epoch": 8.493103448275862, "grad_norm": 1.2008087635040283, "learning_rate": 1.0179310344827587e-05, "loss": 0.4188, "step": 2463 }, { "epoch": 8.49655172413793, "grad_norm": 1.3286844491958618, "learning_rate": 1.0183448275862069e-05, "loss": 0.3812, "step": 2464 }, { "epoch": 8.5, "grad_norm": 2.117759943008423, "learning_rate": 1.0187586206896551e-05, "loss": 0.35, "step": 2465 }, { "epoch": 8.50344827586207, "grad_norm": 1.2638177871704102, "learning_rate": 1.0191724137931035e-05, "loss": 0.4053, "step": 2466 }, { "epoch": 8.506896551724138, "grad_norm": 1.4343847036361694, "learning_rate": 1.0195862068965517e-05, "loss": 0.3841, "step": 2467 }, { "epoch": 8.510344827586207, "grad_norm": 1.355989694595337, "learning_rate": 1.02e-05, "loss": 0.4385, "step": 2468 }, { "epoch": 8.513793103448275, "grad_norm": 2.0521364212036133, "learning_rate": 1.0204137931034483e-05, "loss": 0.4979, "step": 2469 }, { "epoch": 8.517241379310345, "grad_norm": 3.38191819190979, "learning_rate": 1.0208275862068966e-05, "loss": 0.5804, "step": 2470 }, { "epoch": 8.520689655172413, "grad_norm": 0.8719080090522766, "learning_rate": 1.0212413793103448e-05, "loss": 0.4053, "step": 2471 }, { "epoch": 8.524137931034483, "grad_norm": 0.7600401639938354, "learning_rate": 1.0216551724137932e-05, "loss": 0.3836, "step": 2472 }, { "epoch": 8.527586206896551, "grad_norm": 0.7414761185646057, "learning_rate": 1.0220689655172414e-05, "loss": 0.3838, "step": 2473 }, { "epoch": 8.53103448275862, "grad_norm": 0.8838641047477722, "learning_rate": 1.0224827586206898e-05, "loss": 0.4007, "step": 2474 }, { "epoch": 8.53448275862069, "grad_norm": 0.8959022164344788, "learning_rate": 1.022896551724138e-05, "loss": 0.3785, "step": 2475 }, { "epoch": 8.537931034482758, "grad_norm": 1.013388991355896, "learning_rate": 1.0233103448275864e-05, "loss": 0.3884, "step": 2476 }, { "epoch": 8.541379310344828, "grad_norm": 0.8243466019630432, "learning_rate": 1.0237241379310346e-05, "loss": 0.3753, "step": 2477 }, { "epoch": 8.544827586206896, "grad_norm": 1.1414860486984253, "learning_rate": 1.0241379310344828e-05, "loss": 0.3626, "step": 2478 }, { "epoch": 8.548275862068966, "grad_norm": 0.8648579120635986, "learning_rate": 1.024551724137931e-05, "loss": 0.3616, "step": 2479 }, { "epoch": 8.551724137931034, "grad_norm": 1.069296956062317, "learning_rate": 1.0249655172413793e-05, "loss": 0.3892, "step": 2480 }, { "epoch": 8.555172413793104, "grad_norm": 0.8833296895027161, "learning_rate": 1.0253793103448275e-05, "loss": 0.4309, "step": 2481 }, { "epoch": 8.558620689655172, "grad_norm": 1.0005985498428345, "learning_rate": 1.0257931034482759e-05, "loss": 0.3416, "step": 2482 }, { "epoch": 8.562068965517241, "grad_norm": 1.9344159364700317, "learning_rate": 1.0262068965517241e-05, "loss": 0.3775, "step": 2483 }, { "epoch": 8.565517241379311, "grad_norm": 1.1410139799118042, "learning_rate": 1.0266206896551725e-05, "loss": 0.3491, "step": 2484 }, { "epoch": 8.568965517241379, "grad_norm": 1.440813422203064, "learning_rate": 1.0270344827586207e-05, "loss": 0.3907, "step": 2485 }, { "epoch": 8.572413793103449, "grad_norm": 1.0882221460342407, "learning_rate": 1.027448275862069e-05, "loss": 0.3693, "step": 2486 }, { "epoch": 8.575862068965517, "grad_norm": 1.547493815422058, "learning_rate": 1.0278620689655172e-05, "loss": 0.4272, "step": 2487 }, { "epoch": 8.579310344827586, "grad_norm": 1.3091845512390137, "learning_rate": 1.0282758620689656e-05, "loss": 0.3744, "step": 2488 }, { "epoch": 8.582758620689654, "grad_norm": 1.4817161560058594, "learning_rate": 1.0286896551724138e-05, "loss": 0.4001, "step": 2489 }, { "epoch": 8.586206896551724, "grad_norm": 1.1359484195709229, "learning_rate": 1.0291034482758622e-05, "loss": 0.3786, "step": 2490 }, { "epoch": 8.589655172413792, "grad_norm": 1.7325903177261353, "learning_rate": 1.0295172413793102e-05, "loss": 0.4234, "step": 2491 }, { "epoch": 8.593103448275862, "grad_norm": 1.3769828081130981, "learning_rate": 1.0299310344827586e-05, "loss": 0.3877, "step": 2492 }, { "epoch": 8.596551724137932, "grad_norm": 3.662612199783325, "learning_rate": 1.0303448275862068e-05, "loss": 0.4346, "step": 2493 }, { "epoch": 8.6, "grad_norm": 2.305130958557129, "learning_rate": 1.0307586206896552e-05, "loss": 0.5149, "step": 2494 }, { "epoch": 8.60344827586207, "grad_norm": 3.243431806564331, "learning_rate": 1.0311724137931034e-05, "loss": 0.6109, "step": 2495 }, { "epoch": 8.606896551724137, "grad_norm": 1.431944727897644, "learning_rate": 1.0315862068965517e-05, "loss": 0.4491, "step": 2496 }, { "epoch": 8.610344827586207, "grad_norm": 0.8196541666984558, "learning_rate": 1.032e-05, "loss": 0.3512, "step": 2497 }, { "epoch": 8.613793103448275, "grad_norm": 0.8996418714523315, "learning_rate": 1.0324137931034483e-05, "loss": 0.4041, "step": 2498 }, { "epoch": 8.617241379310345, "grad_norm": 0.9493209719657898, "learning_rate": 1.0328275862068967e-05, "loss": 0.3824, "step": 2499 }, { "epoch": 8.620689655172415, "grad_norm": 0.7592256665229797, "learning_rate": 1.0332413793103449e-05, "loss": 0.3773, "step": 2500 }, { "epoch": 8.624137931034483, "grad_norm": 0.8956820964813232, "learning_rate": 1.0336551724137933e-05, "loss": 0.357, "step": 2501 }, { "epoch": 8.627586206896552, "grad_norm": 1.225331425666809, "learning_rate": 1.0340689655172415e-05, "loss": 0.4077, "step": 2502 }, { "epoch": 8.63103448275862, "grad_norm": 0.9252066016197205, "learning_rate": 1.0344827586206898e-05, "loss": 0.3888, "step": 2503 }, { "epoch": 8.63448275862069, "grad_norm": 1.1819201707839966, "learning_rate": 1.0348965517241379e-05, "loss": 0.375, "step": 2504 }, { "epoch": 8.637931034482758, "grad_norm": 1.3423759937286377, "learning_rate": 1.0353103448275862e-05, "loss": 0.3416, "step": 2505 }, { "epoch": 8.641379310344828, "grad_norm": 1.7768049240112305, "learning_rate": 1.0357241379310344e-05, "loss": 0.3685, "step": 2506 }, { "epoch": 8.644827586206896, "grad_norm": 2.4356372356414795, "learning_rate": 1.0361379310344828e-05, "loss": 0.3789, "step": 2507 }, { "epoch": 8.648275862068965, "grad_norm": 1.014826774597168, "learning_rate": 1.036551724137931e-05, "loss": 0.3734, "step": 2508 }, { "epoch": 8.651724137931035, "grad_norm": 0.9532105326652527, "learning_rate": 1.0369655172413794e-05, "loss": 0.3843, "step": 2509 }, { "epoch": 8.655172413793103, "grad_norm": 1.0884021520614624, "learning_rate": 1.0373793103448276e-05, "loss": 0.4215, "step": 2510 }, { "epoch": 8.658620689655173, "grad_norm": 1.2218250036239624, "learning_rate": 1.037793103448276e-05, "loss": 0.3705, "step": 2511 }, { "epoch": 8.662068965517241, "grad_norm": 2.440671682357788, "learning_rate": 1.0382068965517242e-05, "loss": 0.4046, "step": 2512 }, { "epoch": 8.66551724137931, "grad_norm": 2.1299116611480713, "learning_rate": 1.0386206896551725e-05, "loss": 0.3687, "step": 2513 }, { "epoch": 8.668965517241379, "grad_norm": 1.1005237102508545, "learning_rate": 1.0390344827586207e-05, "loss": 0.3949, "step": 2514 }, { "epoch": 8.672413793103448, "grad_norm": 1.1653485298156738, "learning_rate": 1.0394482758620691e-05, "loss": 0.3795, "step": 2515 }, { "epoch": 8.675862068965518, "grad_norm": 1.0649363994598389, "learning_rate": 1.0398620689655173e-05, "loss": 0.3703, "step": 2516 }, { "epoch": 8.679310344827586, "grad_norm": 1.9073071479797363, "learning_rate": 1.0402758620689655e-05, "loss": 0.4208, "step": 2517 }, { "epoch": 8.682758620689656, "grad_norm": 2.7248551845550537, "learning_rate": 1.0406896551724137e-05, "loss": 0.3769, "step": 2518 }, { "epoch": 8.686206896551724, "grad_norm": 2.2425827980041504, "learning_rate": 1.041103448275862e-05, "loss": 0.4575, "step": 2519 }, { "epoch": 8.689655172413794, "grad_norm": 1.9089826345443726, "learning_rate": 1.0415172413793103e-05, "loss": 0.5626, "step": 2520 }, { "epoch": 8.693103448275862, "grad_norm": 0.8941256999969482, "learning_rate": 1.0419310344827587e-05, "loss": 0.4494, "step": 2521 }, { "epoch": 8.696551724137931, "grad_norm": 0.7974642515182495, "learning_rate": 1.0423448275862069e-05, "loss": 0.4114, "step": 2522 }, { "epoch": 8.7, "grad_norm": 0.7768343091011047, "learning_rate": 1.0427586206896552e-05, "loss": 0.3497, "step": 2523 }, { "epoch": 8.703448275862069, "grad_norm": 0.8609229922294617, "learning_rate": 1.0431724137931034e-05, "loss": 0.3977, "step": 2524 }, { "epoch": 8.706896551724139, "grad_norm": 1.2646762132644653, "learning_rate": 1.0435862068965518e-05, "loss": 0.3622, "step": 2525 }, { "epoch": 8.710344827586207, "grad_norm": 0.9895530939102173, "learning_rate": 1.044e-05, "loss": 0.3261, "step": 2526 }, { "epoch": 8.713793103448277, "grad_norm": 0.8866108059883118, "learning_rate": 1.0444137931034484e-05, "loss": 0.3625, "step": 2527 }, { "epoch": 8.717241379310344, "grad_norm": 3.34957218170166, "learning_rate": 1.0448275862068966e-05, "loss": 0.3848, "step": 2528 }, { "epoch": 8.720689655172414, "grad_norm": 0.9011849761009216, "learning_rate": 1.0452413793103448e-05, "loss": 0.3772, "step": 2529 }, { "epoch": 8.724137931034482, "grad_norm": 1.4339579343795776, "learning_rate": 1.045655172413793e-05, "loss": 0.3923, "step": 2530 }, { "epoch": 8.727586206896552, "grad_norm": 1.3489489555358887, "learning_rate": 1.0460689655172413e-05, "loss": 0.3589, "step": 2531 }, { "epoch": 8.73103448275862, "grad_norm": 1.8905000686645508, "learning_rate": 1.0464827586206897e-05, "loss": 0.3695, "step": 2532 }, { "epoch": 8.73448275862069, "grad_norm": 1.1253653764724731, "learning_rate": 1.046896551724138e-05, "loss": 0.3917, "step": 2533 }, { "epoch": 8.73793103448276, "grad_norm": 1.3284363746643066, "learning_rate": 1.0473103448275863e-05, "loss": 0.4123, "step": 2534 }, { "epoch": 8.741379310344827, "grad_norm": 1.1570411920547485, "learning_rate": 1.0477241379310345e-05, "loss": 0.4187, "step": 2535 }, { "epoch": 8.744827586206897, "grad_norm": 1.6909993886947632, "learning_rate": 1.0481379310344829e-05, "loss": 0.3655, "step": 2536 }, { "epoch": 8.748275862068965, "grad_norm": 1.1679692268371582, "learning_rate": 1.048551724137931e-05, "loss": 0.3552, "step": 2537 }, { "epoch": 8.751724137931035, "grad_norm": 1.2461036443710327, "learning_rate": 1.0489655172413794e-05, "loss": 0.3929, "step": 2538 }, { "epoch": 8.755172413793103, "grad_norm": 2.757154941558838, "learning_rate": 1.0493793103448276e-05, "loss": 0.3694, "step": 2539 }, { "epoch": 8.758620689655173, "grad_norm": 1.4535998106002808, "learning_rate": 1.049793103448276e-05, "loss": 0.3698, "step": 2540 }, { "epoch": 8.76206896551724, "grad_norm": 2.018662691116333, "learning_rate": 1.0502068965517242e-05, "loss": 0.412, "step": 2541 }, { "epoch": 8.76551724137931, "grad_norm": 2.754152297973633, "learning_rate": 1.0506206896551724e-05, "loss": 0.4761, "step": 2542 }, { "epoch": 8.76896551724138, "grad_norm": 4.000030040740967, "learning_rate": 1.0510344827586206e-05, "loss": 0.42, "step": 2543 }, { "epoch": 8.772413793103448, "grad_norm": 1.8818734884262085, "learning_rate": 1.051448275862069e-05, "loss": 0.4332, "step": 2544 }, { "epoch": 8.775862068965518, "grad_norm": 2.3319902420043945, "learning_rate": 1.0518620689655172e-05, "loss": 0.5917, "step": 2545 }, { "epoch": 8.779310344827586, "grad_norm": 0.8576189279556274, "learning_rate": 1.0522758620689656e-05, "loss": 0.4099, "step": 2546 }, { "epoch": 8.782758620689656, "grad_norm": 0.8340697288513184, "learning_rate": 1.0526896551724138e-05, "loss": 0.3847, "step": 2547 }, { "epoch": 8.786206896551723, "grad_norm": 0.7577791213989258, "learning_rate": 1.0531034482758621e-05, "loss": 0.4162, "step": 2548 }, { "epoch": 8.789655172413793, "grad_norm": 0.7888219952583313, "learning_rate": 1.0535172413793103e-05, "loss": 0.342, "step": 2549 }, { "epoch": 8.793103448275861, "grad_norm": 1.0022138357162476, "learning_rate": 1.0539310344827587e-05, "loss": 0.3838, "step": 2550 }, { "epoch": 8.796551724137931, "grad_norm": 0.8438299894332886, "learning_rate": 1.0543448275862069e-05, "loss": 0.3599, "step": 2551 }, { "epoch": 8.8, "grad_norm": 0.857320249080658, "learning_rate": 1.0547586206896553e-05, "loss": 0.4003, "step": 2552 }, { "epoch": 8.803448275862069, "grad_norm": 1.036697506904602, "learning_rate": 1.0551724137931035e-05, "loss": 0.3699, "step": 2553 }, { "epoch": 8.806896551724138, "grad_norm": 1.045536756515503, "learning_rate": 1.0555862068965519e-05, "loss": 0.351, "step": 2554 }, { "epoch": 8.810344827586206, "grad_norm": 3.7477588653564453, "learning_rate": 1.0559999999999999e-05, "loss": 0.3898, "step": 2555 }, { "epoch": 8.813793103448276, "grad_norm": 0.9782183170318604, "learning_rate": 1.0564137931034483e-05, "loss": 0.3613, "step": 2556 }, { "epoch": 8.817241379310344, "grad_norm": 1.6955149173736572, "learning_rate": 1.0568275862068965e-05, "loss": 0.367, "step": 2557 }, { "epoch": 8.820689655172414, "grad_norm": 1.037423849105835, "learning_rate": 1.0572413793103448e-05, "loss": 0.3823, "step": 2558 }, { "epoch": 8.824137931034482, "grad_norm": 1.1420235633850098, "learning_rate": 1.057655172413793e-05, "loss": 0.3786, "step": 2559 }, { "epoch": 8.827586206896552, "grad_norm": 1.3203436136245728, "learning_rate": 1.0580689655172414e-05, "loss": 0.3836, "step": 2560 }, { "epoch": 8.831034482758621, "grad_norm": 1.1813002824783325, "learning_rate": 1.0584827586206896e-05, "loss": 0.3762, "step": 2561 }, { "epoch": 8.83448275862069, "grad_norm": 1.166910171508789, "learning_rate": 1.058896551724138e-05, "loss": 0.4, "step": 2562 }, { "epoch": 8.837931034482759, "grad_norm": 1.0238449573516846, "learning_rate": 1.0593103448275862e-05, "loss": 0.3901, "step": 2563 }, { "epoch": 8.841379310344827, "grad_norm": 1.1172372102737427, "learning_rate": 1.0597241379310345e-05, "loss": 0.3742, "step": 2564 }, { "epoch": 8.844827586206897, "grad_norm": 1.223456859588623, "learning_rate": 1.060137931034483e-05, "loss": 0.3545, "step": 2565 }, { "epoch": 8.848275862068965, "grad_norm": 1.6168657541275024, "learning_rate": 1.0605517241379311e-05, "loss": 0.3946, "step": 2566 }, { "epoch": 8.851724137931035, "grad_norm": 3.259182929992676, "learning_rate": 1.0609655172413795e-05, "loss": 0.3608, "step": 2567 }, { "epoch": 8.855172413793104, "grad_norm": 2.729572296142578, "learning_rate": 1.0613793103448275e-05, "loss": 0.4221, "step": 2568 }, { "epoch": 8.858620689655172, "grad_norm": 2.0708963871002197, "learning_rate": 1.0617931034482759e-05, "loss": 0.4613, "step": 2569 }, { "epoch": 8.862068965517242, "grad_norm": 8.344003677368164, "learning_rate": 1.0622068965517241e-05, "loss": 0.6178, "step": 2570 }, { "epoch": 8.86551724137931, "grad_norm": 1.671143889427185, "learning_rate": 1.0626206896551725e-05, "loss": 0.4228, "step": 2571 }, { "epoch": 8.86896551724138, "grad_norm": 1.2414354085922241, "learning_rate": 1.0630344827586207e-05, "loss": 0.4015, "step": 2572 }, { "epoch": 8.872413793103448, "grad_norm": 0.9782024621963501, "learning_rate": 1.063448275862069e-05, "loss": 0.37, "step": 2573 }, { "epoch": 8.875862068965517, "grad_norm": 0.7351034283638, "learning_rate": 1.0638620689655172e-05, "loss": 0.3553, "step": 2574 }, { "epoch": 8.879310344827585, "grad_norm": 1.0810050964355469, "learning_rate": 1.0642758620689656e-05, "loss": 0.3704, "step": 2575 }, { "epoch": 8.882758620689655, "grad_norm": 0.7878817915916443, "learning_rate": 1.0646896551724138e-05, "loss": 0.3439, "step": 2576 }, { "epoch": 8.886206896551725, "grad_norm": 0.8647119998931885, "learning_rate": 1.0651034482758622e-05, "loss": 0.3668, "step": 2577 }, { "epoch": 8.889655172413793, "grad_norm": 1.0891149044036865, "learning_rate": 1.0655172413793104e-05, "loss": 0.3686, "step": 2578 }, { "epoch": 8.893103448275863, "grad_norm": 1.0213607549667358, "learning_rate": 1.0659310344827588e-05, "loss": 0.3696, "step": 2579 }, { "epoch": 8.89655172413793, "grad_norm": 1.229444146156311, "learning_rate": 1.066344827586207e-05, "loss": 0.3663, "step": 2580 }, { "epoch": 8.9, "grad_norm": 0.9911984205245972, "learning_rate": 1.0667586206896552e-05, "loss": 0.3886, "step": 2581 }, { "epoch": 8.903448275862068, "grad_norm": 1.2000603675842285, "learning_rate": 1.0671724137931034e-05, "loss": 0.413, "step": 2582 }, { "epoch": 8.906896551724138, "grad_norm": 1.096451997756958, "learning_rate": 1.0675862068965517e-05, "loss": 0.3563, "step": 2583 }, { "epoch": 8.910344827586208, "grad_norm": 1.1097477674484253, "learning_rate": 1.068e-05, "loss": 0.3783, "step": 2584 }, { "epoch": 8.913793103448276, "grad_norm": 1.070035696029663, "learning_rate": 1.0684137931034483e-05, "loss": 0.3754, "step": 2585 }, { "epoch": 8.917241379310346, "grad_norm": 1.079339861869812, "learning_rate": 1.0688275862068965e-05, "loss": 0.3749, "step": 2586 }, { "epoch": 8.920689655172414, "grad_norm": 1.57889986038208, "learning_rate": 1.0692413793103449e-05, "loss": 0.3415, "step": 2587 }, { "epoch": 8.924137931034483, "grad_norm": 1.1712700128555298, "learning_rate": 1.0696551724137931e-05, "loss": 0.3619, "step": 2588 }, { "epoch": 8.927586206896551, "grad_norm": 1.9474700689315796, "learning_rate": 1.0700689655172415e-05, "loss": 0.3524, "step": 2589 }, { "epoch": 8.931034482758621, "grad_norm": 1.3340846300125122, "learning_rate": 1.0704827586206897e-05, "loss": 0.3875, "step": 2590 }, { "epoch": 8.934482758620689, "grad_norm": 1.5760085582733154, "learning_rate": 1.070896551724138e-05, "loss": 0.3857, "step": 2591 }, { "epoch": 8.937931034482759, "grad_norm": 2.303152322769165, "learning_rate": 1.0713103448275862e-05, "loss": 0.3784, "step": 2592 }, { "epoch": 8.941379310344828, "grad_norm": 1.531446933746338, "learning_rate": 1.0717241379310346e-05, "loss": 0.3943, "step": 2593 }, { "epoch": 8.944827586206896, "grad_norm": 2.335784435272217, "learning_rate": 1.0721379310344826e-05, "loss": 0.4564, "step": 2594 }, { "epoch": 8.948275862068966, "grad_norm": 2.0438637733459473, "learning_rate": 1.072551724137931e-05, "loss": 0.5797, "step": 2595 }, { "epoch": 8.951724137931034, "grad_norm": 1.4370014667510986, "learning_rate": 1.0729655172413792e-05, "loss": 0.4134, "step": 2596 }, { "epoch": 8.955172413793104, "grad_norm": 0.6597642302513123, "learning_rate": 1.0733793103448276e-05, "loss": 0.3608, "step": 2597 }, { "epoch": 8.958620689655172, "grad_norm": 0.993339478969574, "learning_rate": 1.073793103448276e-05, "loss": 0.3531, "step": 2598 }, { "epoch": 8.962068965517242, "grad_norm": 0.7873258590698242, "learning_rate": 1.0742068965517242e-05, "loss": 0.3938, "step": 2599 }, { "epoch": 8.96551724137931, "grad_norm": 0.8977217674255371, "learning_rate": 1.0746206896551725e-05, "loss": 0.3768, "step": 2600 }, { "epoch": 8.96896551724138, "grad_norm": 1.017096996307373, "learning_rate": 1.0750344827586207e-05, "loss": 0.3494, "step": 2601 }, { "epoch": 8.972413793103449, "grad_norm": 1.1403759717941284, "learning_rate": 1.0754482758620691e-05, "loss": 0.3677, "step": 2602 }, { "epoch": 8.975862068965517, "grad_norm": 1.6649240255355835, "learning_rate": 1.0758620689655173e-05, "loss": 0.3623, "step": 2603 }, { "epoch": 8.979310344827587, "grad_norm": 0.9764360785484314, "learning_rate": 1.0762758620689657e-05, "loss": 0.3786, "step": 2604 }, { "epoch": 8.982758620689655, "grad_norm": 1.384561538696289, "learning_rate": 1.0766896551724139e-05, "loss": 0.363, "step": 2605 }, { "epoch": 8.986206896551725, "grad_norm": 1.0612199306488037, "learning_rate": 1.077103448275862e-05, "loss": 0.384, "step": 2606 }, { "epoch": 8.989655172413793, "grad_norm": 2.2521963119506836, "learning_rate": 1.0775172413793103e-05, "loss": 0.4548, "step": 2607 }, { "epoch": 8.993103448275862, "grad_norm": 1.6295661926269531, "learning_rate": 1.0779310344827586e-05, "loss": 0.3697, "step": 2608 }, { "epoch": 8.99655172413793, "grad_norm": 1.4301459789276123, "learning_rate": 1.0783448275862068e-05, "loss": 0.4237, "step": 2609 }, { "epoch": 9.0, "grad_norm": 1.30130136013031, "learning_rate": 1.0787586206896552e-05, "loss": 0.4861, "step": 2610 }, { "epoch": 9.00344827586207, "grad_norm": 1.1227166652679443, "learning_rate": 1.0791724137931034e-05, "loss": 0.4512, "step": 2611 }, { "epoch": 9.006896551724138, "grad_norm": 0.7551761865615845, "learning_rate": 1.0795862068965518e-05, "loss": 0.3687, "step": 2612 }, { "epoch": 9.010344827586207, "grad_norm": 0.8712340593338013, "learning_rate": 1.08e-05, "loss": 0.367, "step": 2613 }, { "epoch": 9.013793103448275, "grad_norm": 0.7783472537994385, "learning_rate": 1.0804137931034484e-05, "loss": 0.3727, "step": 2614 }, { "epoch": 9.017241379310345, "grad_norm": 0.7998786568641663, "learning_rate": 1.0808275862068966e-05, "loss": 0.3458, "step": 2615 }, { "epoch": 9.020689655172413, "grad_norm": 0.8690416216850281, "learning_rate": 1.081241379310345e-05, "loss": 0.4007, "step": 2616 }, { "epoch": 9.024137931034483, "grad_norm": 1.0475691556930542, "learning_rate": 1.0816551724137931e-05, "loss": 0.3785, "step": 2617 }, { "epoch": 9.027586206896551, "grad_norm": 0.9915716052055359, "learning_rate": 1.0820689655172415e-05, "loss": 0.3659, "step": 2618 }, { "epoch": 9.03103448275862, "grad_norm": 0.7897497415542603, "learning_rate": 1.0824827586206895e-05, "loss": 0.3639, "step": 2619 }, { "epoch": 9.03448275862069, "grad_norm": 1.0591931343078613, "learning_rate": 1.0828965517241379e-05, "loss": 0.3372, "step": 2620 }, { "epoch": 9.037931034482758, "grad_norm": 0.990010678768158, "learning_rate": 1.0833103448275861e-05, "loss": 0.3527, "step": 2621 }, { "epoch": 9.041379310344828, "grad_norm": 1.098168134689331, "learning_rate": 1.0837241379310345e-05, "loss": 0.352, "step": 2622 }, { "epoch": 9.044827586206896, "grad_norm": 1.123281478881836, "learning_rate": 1.0841379310344827e-05, "loss": 0.3377, "step": 2623 }, { "epoch": 9.048275862068966, "grad_norm": 1.2124539613723755, "learning_rate": 1.084551724137931e-05, "loss": 0.3614, "step": 2624 }, { "epoch": 9.051724137931034, "grad_norm": 1.0148292779922485, "learning_rate": 1.0849655172413793e-05, "loss": 0.3364, "step": 2625 }, { "epoch": 9.055172413793104, "grad_norm": 1.0587658882141113, "learning_rate": 1.0853793103448276e-05, "loss": 0.3438, "step": 2626 }, { "epoch": 9.058620689655172, "grad_norm": 1.4001460075378418, "learning_rate": 1.0857931034482758e-05, "loss": 0.3674, "step": 2627 }, { "epoch": 9.062068965517241, "grad_norm": 1.01576828956604, "learning_rate": 1.0862068965517242e-05, "loss": 0.3573, "step": 2628 }, { "epoch": 9.065517241379311, "grad_norm": 1.2916510105133057, "learning_rate": 1.0866206896551724e-05, "loss": 0.429, "step": 2629 }, { "epoch": 9.068965517241379, "grad_norm": 20.633525848388672, "learning_rate": 1.0870344827586208e-05, "loss": 0.4424, "step": 2630 }, { "epoch": 9.072413793103449, "grad_norm": 2.027778148651123, "learning_rate": 1.0874482758620692e-05, "loss": 0.4373, "step": 2631 }, { "epoch": 9.075862068965517, "grad_norm": 1.1945964097976685, "learning_rate": 1.0878620689655172e-05, "loss": 0.3841, "step": 2632 }, { "epoch": 9.079310344827586, "grad_norm": 1.1440244913101196, "learning_rate": 1.0882758620689656e-05, "loss": 0.3854, "step": 2633 }, { "epoch": 9.082758620689654, "grad_norm": 7.802204608917236, "learning_rate": 1.0886896551724138e-05, "loss": 0.4265, "step": 2634 }, { "epoch": 9.086206896551724, "grad_norm": 2.0852158069610596, "learning_rate": 1.0891034482758621e-05, "loss": 0.4516, "step": 2635 }, { "epoch": 9.089655172413794, "grad_norm": 1.3060754537582397, "learning_rate": 1.0895172413793103e-05, "loss": 0.4152, "step": 2636 }, { "epoch": 9.093103448275862, "grad_norm": 0.9559107422828674, "learning_rate": 1.0899310344827587e-05, "loss": 0.3731, "step": 2637 }, { "epoch": 9.096551724137932, "grad_norm": 0.8162037134170532, "learning_rate": 1.0903448275862069e-05, "loss": 0.3565, "step": 2638 }, { "epoch": 9.1, "grad_norm": 1.0467230081558228, "learning_rate": 1.0907586206896553e-05, "loss": 0.375, "step": 2639 }, { "epoch": 9.10344827586207, "grad_norm": 1.1144274473190308, "learning_rate": 1.0911724137931035e-05, "loss": 0.3735, "step": 2640 }, { "epoch": 9.106896551724137, "grad_norm": 0.8862094283103943, "learning_rate": 1.0915862068965518e-05, "loss": 0.3552, "step": 2641 }, { "epoch": 9.110344827586207, "grad_norm": 1.1491988897323608, "learning_rate": 1.092e-05, "loss": 0.3639, "step": 2642 }, { "epoch": 9.113793103448275, "grad_norm": 1.1531528234481812, "learning_rate": 1.0924137931034484e-05, "loss": 0.3808, "step": 2643 }, { "epoch": 9.117241379310345, "grad_norm": 1.0016958713531494, "learning_rate": 1.0928275862068966e-05, "loss": 0.3734, "step": 2644 }, { "epoch": 9.120689655172415, "grad_norm": 0.907098650932312, "learning_rate": 1.0932413793103448e-05, "loss": 0.3727, "step": 2645 }, { "epoch": 9.124137931034483, "grad_norm": 1.0253077745437622, "learning_rate": 1.093655172413793e-05, "loss": 0.3671, "step": 2646 }, { "epoch": 9.127586206896552, "grad_norm": 1.5086324214935303, "learning_rate": 1.0940689655172414e-05, "loss": 0.3545, "step": 2647 }, { "epoch": 9.13103448275862, "grad_norm": 1.0487312078475952, "learning_rate": 1.0944827586206896e-05, "loss": 0.3579, "step": 2648 }, { "epoch": 9.13448275862069, "grad_norm": 0.918933629989624, "learning_rate": 1.094896551724138e-05, "loss": 0.3328, "step": 2649 }, { "epoch": 9.137931034482758, "grad_norm": 1.0756975412368774, "learning_rate": 1.0953103448275862e-05, "loss": 0.4086, "step": 2650 }, { "epoch": 9.141379310344828, "grad_norm": 1.1035444736480713, "learning_rate": 1.0957241379310345e-05, "loss": 0.3826, "step": 2651 }, { "epoch": 9.144827586206896, "grad_norm": 1.1974966526031494, "learning_rate": 1.0961379310344827e-05, "loss": 0.3679, "step": 2652 }, { "epoch": 9.148275862068965, "grad_norm": 1.4653823375701904, "learning_rate": 1.0965517241379311e-05, "loss": 0.3578, "step": 2653 }, { "epoch": 9.151724137931035, "grad_norm": 1.0829731225967407, "learning_rate": 1.0969655172413793e-05, "loss": 0.3356, "step": 2654 }, { "epoch": 9.155172413793103, "grad_norm": 1.151671290397644, "learning_rate": 1.0973793103448277e-05, "loss": 0.3709, "step": 2655 }, { "epoch": 9.158620689655173, "grad_norm": 1.3670474290847778, "learning_rate": 1.0977931034482759e-05, "loss": 0.3662, "step": 2656 }, { "epoch": 9.162068965517241, "grad_norm": 1.4510750770568848, "learning_rate": 1.0982068965517243e-05, "loss": 0.4376, "step": 2657 }, { "epoch": 9.16551724137931, "grad_norm": 1.3292996883392334, "learning_rate": 1.0986206896551723e-05, "loss": 0.4517, "step": 2658 }, { "epoch": 9.168965517241379, "grad_norm": 1.7425565719604492, "learning_rate": 1.0990344827586207e-05, "loss": 0.4485, "step": 2659 }, { "epoch": 9.172413793103448, "grad_norm": 1.852247953414917, "learning_rate": 1.0994482758620689e-05, "loss": 0.5875, "step": 2660 }, { "epoch": 9.175862068965516, "grad_norm": 0.7688723802566528, "learning_rate": 1.0998620689655172e-05, "loss": 0.4359, "step": 2661 }, { "epoch": 9.179310344827586, "grad_norm": 0.8118736147880554, "learning_rate": 1.1002758620689654e-05, "loss": 0.4223, "step": 2662 }, { "epoch": 9.182758620689656, "grad_norm": 0.9218946695327759, "learning_rate": 1.1006896551724138e-05, "loss": 0.3914, "step": 2663 }, { "epoch": 9.186206896551724, "grad_norm": 1.1600404977798462, "learning_rate": 1.1011034482758622e-05, "loss": 0.341, "step": 2664 }, { "epoch": 9.189655172413794, "grad_norm": 0.8117491006851196, "learning_rate": 1.1015172413793104e-05, "loss": 0.3528, "step": 2665 }, { "epoch": 9.193103448275862, "grad_norm": 1.1758906841278076, "learning_rate": 1.1019310344827588e-05, "loss": 0.3486, "step": 2666 }, { "epoch": 9.196551724137931, "grad_norm": 0.8649953603744507, "learning_rate": 1.102344827586207e-05, "loss": 0.3601, "step": 2667 }, { "epoch": 9.2, "grad_norm": 1.0008772611618042, "learning_rate": 1.1027586206896553e-05, "loss": 0.3719, "step": 2668 }, { "epoch": 9.203448275862069, "grad_norm": 3.2956454753875732, "learning_rate": 1.1031724137931035e-05, "loss": 0.3451, "step": 2669 }, { "epoch": 9.206896551724139, "grad_norm": 0.9921894669532776, "learning_rate": 1.1035862068965519e-05, "loss": 0.3683, "step": 2670 }, { "epoch": 9.210344827586207, "grad_norm": 0.9086856245994568, "learning_rate": 1.104e-05, "loss": 0.377, "step": 2671 }, { "epoch": 9.213793103448277, "grad_norm": 0.8611367344856262, "learning_rate": 1.1044137931034483e-05, "loss": 0.3525, "step": 2672 }, { "epoch": 9.217241379310344, "grad_norm": 1.2400901317596436, "learning_rate": 1.1048275862068965e-05, "loss": 0.3627, "step": 2673 }, { "epoch": 9.220689655172414, "grad_norm": 0.9769788980484009, "learning_rate": 1.1052413793103449e-05, "loss": 0.3518, "step": 2674 }, { "epoch": 9.224137931034482, "grad_norm": 1.552931308746338, "learning_rate": 1.105655172413793e-05, "loss": 0.3653, "step": 2675 }, { "epoch": 9.227586206896552, "grad_norm": 1.1220771074295044, "learning_rate": 1.1060689655172415e-05, "loss": 0.3374, "step": 2676 }, { "epoch": 9.23103448275862, "grad_norm": 3.4549763202667236, "learning_rate": 1.1064827586206897e-05, "loss": 0.3401, "step": 2677 }, { "epoch": 9.23448275862069, "grad_norm": 1.5400334596633911, "learning_rate": 1.106896551724138e-05, "loss": 0.362, "step": 2678 }, { "epoch": 9.23793103448276, "grad_norm": 1.497214674949646, "learning_rate": 1.1073103448275862e-05, "loss": 0.3649, "step": 2679 }, { "epoch": 9.241379310344827, "grad_norm": 1.2227965593338013, "learning_rate": 1.1077241379310346e-05, "loss": 0.3773, "step": 2680 }, { "epoch": 9.244827586206897, "grad_norm": 1.6808533668518066, "learning_rate": 1.1081379310344828e-05, "loss": 0.3897, "step": 2681 }, { "epoch": 9.248275862068965, "grad_norm": 1.0694248676300049, "learning_rate": 1.1085517241379312e-05, "loss": 0.3664, "step": 2682 }, { "epoch": 9.251724137931035, "grad_norm": 1.089974284172058, "learning_rate": 1.1089655172413794e-05, "loss": 0.4079, "step": 2683 }, { "epoch": 9.255172413793103, "grad_norm": 1.2185485363006592, "learning_rate": 1.1093793103448276e-05, "loss": 0.4151, "step": 2684 }, { "epoch": 9.258620689655173, "grad_norm": 5.120865821838379, "learning_rate": 1.1097931034482758e-05, "loss": 0.5807, "step": 2685 }, { "epoch": 9.26206896551724, "grad_norm": 0.8112298250198364, "learning_rate": 1.1102068965517241e-05, "loss": 0.425, "step": 2686 }, { "epoch": 9.26551724137931, "grad_norm": 1.2661129236221313, "learning_rate": 1.1106206896551723e-05, "loss": 0.3292, "step": 2687 }, { "epoch": 9.26896551724138, "grad_norm": 0.8307352066040039, "learning_rate": 1.1110344827586207e-05, "loss": 0.4046, "step": 2688 }, { "epoch": 9.272413793103448, "grad_norm": 0.9375404715538025, "learning_rate": 1.111448275862069e-05, "loss": 0.3437, "step": 2689 }, { "epoch": 9.275862068965518, "grad_norm": 1.0203908681869507, "learning_rate": 1.1118620689655173e-05, "loss": 0.3611, "step": 2690 }, { "epoch": 9.279310344827586, "grad_norm": 0.9415742754936218, "learning_rate": 1.1122758620689655e-05, "loss": 0.3493, "step": 2691 }, { "epoch": 9.282758620689656, "grad_norm": 2.1167044639587402, "learning_rate": 1.1126896551724139e-05, "loss": 0.3504, "step": 2692 }, { "epoch": 9.286206896551723, "grad_norm": 0.9034416675567627, "learning_rate": 1.113103448275862e-05, "loss": 0.4059, "step": 2693 }, { "epoch": 9.289655172413793, "grad_norm": 1.0277414321899414, "learning_rate": 1.1135172413793104e-05, "loss": 0.3571, "step": 2694 }, { "epoch": 9.293103448275861, "grad_norm": 1.078853726387024, "learning_rate": 1.1139310344827586e-05, "loss": 0.3295, "step": 2695 }, { "epoch": 9.296551724137931, "grad_norm": 0.8033673763275146, "learning_rate": 1.114344827586207e-05, "loss": 0.3287, "step": 2696 }, { "epoch": 9.3, "grad_norm": 1.0420211553573608, "learning_rate": 1.1147586206896552e-05, "loss": 0.3681, "step": 2697 }, { "epoch": 9.303448275862069, "grad_norm": 0.8639684319496155, "learning_rate": 1.1151724137931034e-05, "loss": 0.3258, "step": 2698 }, { "epoch": 9.306896551724138, "grad_norm": 0.9173340201377869, "learning_rate": 1.1155862068965518e-05, "loss": 0.3677, "step": 2699 }, { "epoch": 9.310344827586206, "grad_norm": 1.0880295038223267, "learning_rate": 1.116e-05, "loss": 0.3907, "step": 2700 }, { "epoch": 9.313793103448276, "grad_norm": 0.9087486267089844, "learning_rate": 1.1164137931034484e-05, "loss": 0.3633, "step": 2701 }, { "epoch": 9.317241379310344, "grad_norm": 1.0112422704696655, "learning_rate": 1.1168275862068966e-05, "loss": 0.3495, "step": 2702 }, { "epoch": 9.320689655172414, "grad_norm": 1.2279645204544067, "learning_rate": 1.117241379310345e-05, "loss": 0.405, "step": 2703 }, { "epoch": 9.324137931034482, "grad_norm": 1.073796272277832, "learning_rate": 1.1176551724137931e-05, "loss": 0.3651, "step": 2704 }, { "epoch": 9.327586206896552, "grad_norm": 1.4354662895202637, "learning_rate": 1.1180689655172415e-05, "loss": 0.4092, "step": 2705 }, { "epoch": 9.331034482758621, "grad_norm": 1.5754289627075195, "learning_rate": 1.1184827586206897e-05, "loss": 0.4054, "step": 2706 }, { "epoch": 9.33448275862069, "grad_norm": 1.3705780506134033, "learning_rate": 1.118896551724138e-05, "loss": 0.3644, "step": 2707 }, { "epoch": 9.337931034482759, "grad_norm": 1.3612403869628906, "learning_rate": 1.1193103448275863e-05, "loss": 0.3821, "step": 2708 }, { "epoch": 9.341379310344827, "grad_norm": 3.75003981590271, "learning_rate": 1.1197241379310345e-05, "loss": 0.4349, "step": 2709 }, { "epoch": 9.344827586206897, "grad_norm": 1.6917484998703003, "learning_rate": 1.1201379310344827e-05, "loss": 0.5791, "step": 2710 }, { "epoch": 9.348275862068965, "grad_norm": 0.9774472713470459, "learning_rate": 1.120551724137931e-05, "loss": 0.3868, "step": 2711 }, { "epoch": 9.351724137931035, "grad_norm": 0.7153780460357666, "learning_rate": 1.1209655172413793e-05, "loss": 0.3355, "step": 2712 }, { "epoch": 9.355172413793104, "grad_norm": 0.730926513671875, "learning_rate": 1.1213793103448276e-05, "loss": 0.3942, "step": 2713 }, { "epoch": 9.358620689655172, "grad_norm": 1.2036350965499878, "learning_rate": 1.1217931034482758e-05, "loss": 0.3959, "step": 2714 }, { "epoch": 9.362068965517242, "grad_norm": 0.8444347381591797, "learning_rate": 1.1222068965517242e-05, "loss": 0.341, "step": 2715 }, { "epoch": 9.36551724137931, "grad_norm": 0.7493941783905029, "learning_rate": 1.1226206896551724e-05, "loss": 0.3292, "step": 2716 }, { "epoch": 9.36896551724138, "grad_norm": 0.8206502199172974, "learning_rate": 1.1230344827586208e-05, "loss": 0.3762, "step": 2717 }, { "epoch": 9.372413793103448, "grad_norm": 0.7898188233375549, "learning_rate": 1.123448275862069e-05, "loss": 0.324, "step": 2718 }, { "epoch": 9.375862068965517, "grad_norm": 1.0822956562042236, "learning_rate": 1.1238620689655173e-05, "loss": 0.3557, "step": 2719 }, { "epoch": 9.379310344827585, "grad_norm": 0.9981796741485596, "learning_rate": 1.1242758620689655e-05, "loss": 0.3549, "step": 2720 }, { "epoch": 9.382758620689655, "grad_norm": 0.9867526292800903, "learning_rate": 1.124689655172414e-05, "loss": 0.3702, "step": 2721 }, { "epoch": 9.386206896551725, "grad_norm": 1.067359447479248, "learning_rate": 1.125103448275862e-05, "loss": 0.3588, "step": 2722 }, { "epoch": 9.389655172413793, "grad_norm": 0.8762134909629822, "learning_rate": 1.1255172413793103e-05, "loss": 0.3526, "step": 2723 }, { "epoch": 9.393103448275863, "grad_norm": 0.9990464448928833, "learning_rate": 1.1259310344827585e-05, "loss": 0.3251, "step": 2724 }, { "epoch": 9.39655172413793, "grad_norm": 1.046981930732727, "learning_rate": 1.1263448275862069e-05, "loss": 0.405, "step": 2725 }, { "epoch": 9.4, "grad_norm": 1.3892391920089722, "learning_rate": 1.1267586206896551e-05, "loss": 0.3484, "step": 2726 }, { "epoch": 9.403448275862068, "grad_norm": 2.104858875274658, "learning_rate": 1.1271724137931035e-05, "loss": 0.3467, "step": 2727 }, { "epoch": 9.406896551724138, "grad_norm": 1.097878098487854, "learning_rate": 1.1275862068965517e-05, "loss": 0.3363, "step": 2728 }, { "epoch": 9.410344827586206, "grad_norm": 0.9573140144348145, "learning_rate": 1.128e-05, "loss": 0.3841, "step": 2729 }, { "epoch": 9.413793103448276, "grad_norm": 1.0886683464050293, "learning_rate": 1.1284137931034482e-05, "loss": 0.3546, "step": 2730 }, { "epoch": 9.417241379310346, "grad_norm": 1.677398443222046, "learning_rate": 1.1288275862068966e-05, "loss": 0.4316, "step": 2731 }, { "epoch": 9.420689655172414, "grad_norm": 1.1314175128936768, "learning_rate": 1.129241379310345e-05, "loss": 0.3463, "step": 2732 }, { "epoch": 9.424137931034483, "grad_norm": 1.589165210723877, "learning_rate": 1.1296551724137932e-05, "loss": 0.4315, "step": 2733 }, { "epoch": 9.427586206896551, "grad_norm": 1.6072883605957031, "learning_rate": 1.1300689655172416e-05, "loss": 0.3671, "step": 2734 }, { "epoch": 9.431034482758621, "grad_norm": 2.581719160079956, "learning_rate": 1.1304827586206896e-05, "loss": 0.6252, "step": 2735 }, { "epoch": 9.434482758620689, "grad_norm": 0.8643123507499695, "learning_rate": 1.130896551724138e-05, "loss": 0.382, "step": 2736 }, { "epoch": 9.437931034482759, "grad_norm": 0.6814752817153931, "learning_rate": 1.1313103448275862e-05, "loss": 0.3626, "step": 2737 }, { "epoch": 9.441379310344828, "grad_norm": 1.0593551397323608, "learning_rate": 1.1317241379310345e-05, "loss": 0.4064, "step": 2738 }, { "epoch": 9.444827586206896, "grad_norm": 0.8290854096412659, "learning_rate": 1.1321379310344827e-05, "loss": 0.3533, "step": 2739 }, { "epoch": 9.448275862068966, "grad_norm": 0.7370584607124329, "learning_rate": 1.1325517241379311e-05, "loss": 0.4009, "step": 2740 }, { "epoch": 9.451724137931034, "grad_norm": 0.9262087345123291, "learning_rate": 1.1329655172413793e-05, "loss": 0.3543, "step": 2741 }, { "epoch": 9.455172413793104, "grad_norm": 0.8979434967041016, "learning_rate": 1.1333793103448277e-05, "loss": 0.3765, "step": 2742 }, { "epoch": 9.458620689655172, "grad_norm": 1.0919711589813232, "learning_rate": 1.1337931034482759e-05, "loss": 0.3578, "step": 2743 }, { "epoch": 9.462068965517242, "grad_norm": 0.8398333787918091, "learning_rate": 1.1342068965517243e-05, "loss": 0.3535, "step": 2744 }, { "epoch": 9.46551724137931, "grad_norm": 1.7289808988571167, "learning_rate": 1.1346206896551725e-05, "loss": 0.3519, "step": 2745 }, { "epoch": 9.46896551724138, "grad_norm": 1.1360574960708618, "learning_rate": 1.1350344827586208e-05, "loss": 0.3812, "step": 2746 }, { "epoch": 9.472413793103449, "grad_norm": 1.0475621223449707, "learning_rate": 1.135448275862069e-05, "loss": 0.3898, "step": 2747 }, { "epoch": 9.475862068965517, "grad_norm": 1.0689212083816528, "learning_rate": 1.1358620689655172e-05, "loss": 0.3605, "step": 2748 }, { "epoch": 9.479310344827587, "grad_norm": 0.8115873336791992, "learning_rate": 1.1362758620689654e-05, "loss": 0.3565, "step": 2749 }, { "epoch": 9.482758620689655, "grad_norm": 0.9174138307571411, "learning_rate": 1.1366896551724138e-05, "loss": 0.374, "step": 2750 }, { "epoch": 9.486206896551725, "grad_norm": 1.2235949039459229, "learning_rate": 1.137103448275862e-05, "loss": 0.391, "step": 2751 }, { "epoch": 9.489655172413793, "grad_norm": 0.9141895174980164, "learning_rate": 1.1375172413793104e-05, "loss": 0.413, "step": 2752 }, { "epoch": 9.493103448275862, "grad_norm": 0.9754385948181152, "learning_rate": 1.1379310344827586e-05, "loss": 0.3695, "step": 2753 }, { "epoch": 9.49655172413793, "grad_norm": 0.8715189695358276, "learning_rate": 1.138344827586207e-05, "loss": 0.3319, "step": 2754 }, { "epoch": 9.5, "grad_norm": 1.2465013265609741, "learning_rate": 1.1387586206896552e-05, "loss": 0.386, "step": 2755 }, { "epoch": 9.50344827586207, "grad_norm": 1.0897799730300903, "learning_rate": 1.1391724137931035e-05, "loss": 0.3544, "step": 2756 }, { "epoch": 9.506896551724138, "grad_norm": 1.9236927032470703, "learning_rate": 1.1395862068965517e-05, "loss": 0.3936, "step": 2757 }, { "epoch": 9.510344827586207, "grad_norm": 1.3889343738555908, "learning_rate": 1.1400000000000001e-05, "loss": 0.409, "step": 2758 }, { "epoch": 9.513793103448275, "grad_norm": 2.211074113845825, "learning_rate": 1.1404137931034483e-05, "loss": 0.4552, "step": 2759 }, { "epoch": 9.517241379310345, "grad_norm": 3.984074592590332, "learning_rate": 1.1408275862068967e-05, "loss": 0.5498, "step": 2760 }, { "epoch": 9.520689655172413, "grad_norm": 0.8677812814712524, "learning_rate": 1.1412413793103447e-05, "loss": 0.394, "step": 2761 }, { "epoch": 9.524137931034483, "grad_norm": 0.914909303188324, "learning_rate": 1.141655172413793e-05, "loss": 0.4299, "step": 2762 }, { "epoch": 9.527586206896551, "grad_norm": 0.821894645690918, "learning_rate": 1.1420689655172413e-05, "loss": 0.3292, "step": 2763 }, { "epoch": 9.53103448275862, "grad_norm": 0.7621665000915527, "learning_rate": 1.1424827586206896e-05, "loss": 0.3573, "step": 2764 }, { "epoch": 9.53448275862069, "grad_norm": 1.0199958086013794, "learning_rate": 1.142896551724138e-05, "loss": 0.3456, "step": 2765 }, { "epoch": 9.537931034482758, "grad_norm": 0.9368630051612854, "learning_rate": 1.1433103448275862e-05, "loss": 0.3618, "step": 2766 }, { "epoch": 9.541379310344828, "grad_norm": 1.0316123962402344, "learning_rate": 1.1437241379310346e-05, "loss": 0.3683, "step": 2767 }, { "epoch": 9.544827586206896, "grad_norm": 1.1903069019317627, "learning_rate": 1.1441379310344828e-05, "loss": 0.3734, "step": 2768 }, { "epoch": 9.548275862068966, "grad_norm": 0.7898816466331482, "learning_rate": 1.1445517241379312e-05, "loss": 0.2841, "step": 2769 }, { "epoch": 9.551724137931034, "grad_norm": 0.9634523987770081, "learning_rate": 1.1449655172413794e-05, "loss": 0.3486, "step": 2770 }, { "epoch": 9.555172413793104, "grad_norm": 2.0639216899871826, "learning_rate": 1.1453793103448277e-05, "loss": 0.3458, "step": 2771 }, { "epoch": 9.558620689655172, "grad_norm": 1.0369771718978882, "learning_rate": 1.145793103448276e-05, "loss": 0.4092, "step": 2772 }, { "epoch": 9.562068965517241, "grad_norm": 1.6770331859588623, "learning_rate": 1.1462068965517243e-05, "loss": 0.3612, "step": 2773 }, { "epoch": 9.565517241379311, "grad_norm": 1.0578560829162598, "learning_rate": 1.1466206896551723e-05, "loss": 0.3812, "step": 2774 }, { "epoch": 9.568965517241379, "grad_norm": 0.9345567226409912, "learning_rate": 1.1470344827586207e-05, "loss": 0.3774, "step": 2775 }, { "epoch": 9.572413793103449, "grad_norm": 1.3261594772338867, "learning_rate": 1.1474482758620689e-05, "loss": 0.3783, "step": 2776 }, { "epoch": 9.575862068965517, "grad_norm": 1.0963220596313477, "learning_rate": 1.1478620689655173e-05, "loss": 0.3461, "step": 2777 }, { "epoch": 9.579310344827586, "grad_norm": 1.8499258756637573, "learning_rate": 1.1482758620689655e-05, "loss": 0.3682, "step": 2778 }, { "epoch": 9.582758620689654, "grad_norm": 1.0627328157424927, "learning_rate": 1.1486896551724139e-05, "loss": 0.3538, "step": 2779 }, { "epoch": 9.586206896551724, "grad_norm": 1.3142337799072266, "learning_rate": 1.149103448275862e-05, "loss": 0.3215, "step": 2780 }, { "epoch": 9.589655172413792, "grad_norm": 7.2237396240234375, "learning_rate": 1.1495172413793104e-05, "loss": 0.4209, "step": 2781 }, { "epoch": 9.593103448275862, "grad_norm": 1.39659583568573, "learning_rate": 1.1499310344827586e-05, "loss": 0.3589, "step": 2782 }, { "epoch": 9.596551724137932, "grad_norm": 2.7622528076171875, "learning_rate": 1.150344827586207e-05, "loss": 0.4059, "step": 2783 }, { "epoch": 9.6, "grad_norm": 1.940638780593872, "learning_rate": 1.1507586206896552e-05, "loss": 0.4639, "step": 2784 }, { "epoch": 9.60344827586207, "grad_norm": 2.119779586791992, "learning_rate": 1.1511724137931036e-05, "loss": 0.564, "step": 2785 }, { "epoch": 9.606896551724137, "grad_norm": 0.7693087458610535, "learning_rate": 1.1515862068965518e-05, "loss": 0.3712, "step": 2786 }, { "epoch": 9.610344827586207, "grad_norm": 0.8780040740966797, "learning_rate": 1.152e-05, "loss": 0.3651, "step": 2787 }, { "epoch": 9.613793103448275, "grad_norm": 0.8970050811767578, "learning_rate": 1.1524137931034482e-05, "loss": 0.3589, "step": 2788 }, { "epoch": 9.617241379310345, "grad_norm": 0.9657955765724182, "learning_rate": 1.1528275862068966e-05, "loss": 0.3509, "step": 2789 }, { "epoch": 9.620689655172415, "grad_norm": 1.2372454404830933, "learning_rate": 1.1532413793103448e-05, "loss": 0.3585, "step": 2790 }, { "epoch": 9.624137931034483, "grad_norm": 0.7686637043952942, "learning_rate": 1.1536551724137931e-05, "loss": 0.3567, "step": 2791 }, { "epoch": 9.627586206896552, "grad_norm": 0.9015601873397827, "learning_rate": 1.1540689655172413e-05, "loss": 0.3721, "step": 2792 }, { "epoch": 9.63103448275862, "grad_norm": 0.8420695066452026, "learning_rate": 1.1544827586206897e-05, "loss": 0.3548, "step": 2793 }, { "epoch": 9.63448275862069, "grad_norm": 0.8683902025222778, "learning_rate": 1.1548965517241379e-05, "loss": 0.376, "step": 2794 }, { "epoch": 9.637931034482758, "grad_norm": 0.9175878763198853, "learning_rate": 1.1553103448275863e-05, "loss": 0.3548, "step": 2795 }, { "epoch": 9.641379310344828, "grad_norm": 1.144668698310852, "learning_rate": 1.1557241379310345e-05, "loss": 0.4066, "step": 2796 }, { "epoch": 9.644827586206896, "grad_norm": 0.8877411484718323, "learning_rate": 1.1561379310344828e-05, "loss": 0.366, "step": 2797 }, { "epoch": 9.648275862068965, "grad_norm": 1.0992878675460815, "learning_rate": 1.1565517241379312e-05, "loss": 0.361, "step": 2798 }, { "epoch": 9.651724137931035, "grad_norm": 1.031711220741272, "learning_rate": 1.1569655172413793e-05, "loss": 0.3547, "step": 2799 }, { "epoch": 9.655172413793103, "grad_norm": 1.344602346420288, "learning_rate": 1.1573793103448276e-05, "loss": 0.365, "step": 2800 }, { "epoch": 9.658620689655173, "grad_norm": 1.1852033138275146, "learning_rate": 1.1577931034482758e-05, "loss": 0.3888, "step": 2801 }, { "epoch": 9.662068965517241, "grad_norm": 1.2532734870910645, "learning_rate": 1.1582068965517242e-05, "loss": 0.3505, "step": 2802 }, { "epoch": 9.66551724137931, "grad_norm": 1.0788154602050781, "learning_rate": 1.1586206896551724e-05, "loss": 0.3557, "step": 2803 }, { "epoch": 9.668965517241379, "grad_norm": 1.4847811460494995, "learning_rate": 1.1590344827586208e-05, "loss": 0.3829, "step": 2804 }, { "epoch": 9.672413793103448, "grad_norm": 2.4749503135681152, "learning_rate": 1.159448275862069e-05, "loss": 0.3624, "step": 2805 }, { "epoch": 9.675862068965518, "grad_norm": 1.4022626876831055, "learning_rate": 1.1598620689655173e-05, "loss": 0.3679, "step": 2806 }, { "epoch": 9.679310344827586, "grad_norm": 1.094169020652771, "learning_rate": 1.1602758620689655e-05, "loss": 0.3463, "step": 2807 }, { "epoch": 9.682758620689656, "grad_norm": 1.6166776418685913, "learning_rate": 1.1606896551724139e-05, "loss": 0.385, "step": 2808 }, { "epoch": 9.686206896551724, "grad_norm": 2.005173683166504, "learning_rate": 1.1611034482758621e-05, "loss": 0.4325, "step": 2809 }, { "epoch": 9.689655172413794, "grad_norm": 2.023963212966919, "learning_rate": 1.1615172413793105e-05, "loss": 0.5835, "step": 2810 }, { "epoch": 9.693103448275862, "grad_norm": 0.9433718919754028, "learning_rate": 1.1619310344827587e-05, "loss": 0.3847, "step": 2811 }, { "epoch": 9.696551724137931, "grad_norm": 0.7635188102722168, "learning_rate": 1.1623448275862069e-05, "loss": 0.362, "step": 2812 }, { "epoch": 9.7, "grad_norm": 0.9807383418083191, "learning_rate": 1.1627586206896551e-05, "loss": 0.3587, "step": 2813 }, { "epoch": 9.703448275862069, "grad_norm": 0.9792896509170532, "learning_rate": 1.1631724137931035e-05, "loss": 0.3506, "step": 2814 }, { "epoch": 9.706896551724139, "grad_norm": 0.8801224231719971, "learning_rate": 1.1635862068965517e-05, "loss": 0.3712, "step": 2815 }, { "epoch": 9.710344827586207, "grad_norm": 0.8187879323959351, "learning_rate": 1.164e-05, "loss": 0.3645, "step": 2816 }, { "epoch": 9.713793103448277, "grad_norm": 0.8136239051818848, "learning_rate": 1.1644137931034482e-05, "loss": 0.3331, "step": 2817 }, { "epoch": 9.717241379310344, "grad_norm": 1.1139384508132935, "learning_rate": 1.1648275862068966e-05, "loss": 0.337, "step": 2818 }, { "epoch": 9.720689655172414, "grad_norm": 0.9043321013450623, "learning_rate": 1.1652413793103448e-05, "loss": 0.3403, "step": 2819 }, { "epoch": 9.724137931034482, "grad_norm": 0.9724398255348206, "learning_rate": 1.1656551724137932e-05, "loss": 0.3796, "step": 2820 }, { "epoch": 9.727586206896552, "grad_norm": 0.9484009146690369, "learning_rate": 1.1660689655172414e-05, "loss": 0.3676, "step": 2821 }, { "epoch": 9.73103448275862, "grad_norm": 1.5304434299468994, "learning_rate": 1.1664827586206898e-05, "loss": 0.3736, "step": 2822 }, { "epoch": 9.73448275862069, "grad_norm": 1.349335789680481, "learning_rate": 1.166896551724138e-05, "loss": 0.3552, "step": 2823 }, { "epoch": 9.73793103448276, "grad_norm": 1.2280821800231934, "learning_rate": 1.1673103448275863e-05, "loss": 0.3596, "step": 2824 }, { "epoch": 9.741379310344827, "grad_norm": 1.0674159526824951, "learning_rate": 1.1677241379310344e-05, "loss": 0.384, "step": 2825 }, { "epoch": 9.744827586206897, "grad_norm": 1.1589322090148926, "learning_rate": 1.1681379310344827e-05, "loss": 0.3468, "step": 2826 }, { "epoch": 9.748275862068965, "grad_norm": 1.3592818975448608, "learning_rate": 1.168551724137931e-05, "loss": 0.4025, "step": 2827 }, { "epoch": 9.751724137931035, "grad_norm": 0.9628503918647766, "learning_rate": 1.1689655172413793e-05, "loss": 0.3554, "step": 2828 }, { "epoch": 9.755172413793103, "grad_norm": 1.2944971323013306, "learning_rate": 1.1693793103448275e-05, "loss": 0.3865, "step": 2829 }, { "epoch": 9.758620689655173, "grad_norm": 1.0102852582931519, "learning_rate": 1.1697931034482759e-05, "loss": 0.3867, "step": 2830 }, { "epoch": 9.76206896551724, "grad_norm": 1.173721432685852, "learning_rate": 1.1702068965517242e-05, "loss": 0.3827, "step": 2831 }, { "epoch": 9.76551724137931, "grad_norm": 1.4653915166854858, "learning_rate": 1.1706206896551725e-05, "loss": 0.412, "step": 2832 }, { "epoch": 9.76896551724138, "grad_norm": 1.3813472986221313, "learning_rate": 1.1710344827586208e-05, "loss": 0.4448, "step": 2833 }, { "epoch": 9.772413793103448, "grad_norm": 1.8988237380981445, "learning_rate": 1.171448275862069e-05, "loss": 0.3966, "step": 2834 }, { "epoch": 9.775862068965518, "grad_norm": 1.8604599237442017, "learning_rate": 1.1718620689655174e-05, "loss": 0.4572, "step": 2835 }, { "epoch": 9.779310344827586, "grad_norm": 1.16995370388031, "learning_rate": 1.1722758620689656e-05, "loss": 0.4053, "step": 2836 }, { "epoch": 9.782758620689656, "grad_norm": 0.7718579769134521, "learning_rate": 1.172689655172414e-05, "loss": 0.3867, "step": 2837 }, { "epoch": 9.786206896551723, "grad_norm": 0.7744660973548889, "learning_rate": 1.173103448275862e-05, "loss": 0.3639, "step": 2838 }, { "epoch": 9.789655172413793, "grad_norm": 1.2774311304092407, "learning_rate": 1.1735172413793104e-05, "loss": 0.395, "step": 2839 }, { "epoch": 9.793103448275861, "grad_norm": 0.9740704298019409, "learning_rate": 1.1739310344827586e-05, "loss": 0.387, "step": 2840 }, { "epoch": 9.796551724137931, "grad_norm": 0.8155460357666016, "learning_rate": 1.174344827586207e-05, "loss": 0.3492, "step": 2841 }, { "epoch": 9.8, "grad_norm": 0.7927494049072266, "learning_rate": 1.1747586206896551e-05, "loss": 0.3489, "step": 2842 }, { "epoch": 9.803448275862069, "grad_norm": 3.679628610610962, "learning_rate": 1.1751724137931035e-05, "loss": 0.3762, "step": 2843 }, { "epoch": 9.806896551724138, "grad_norm": 0.8711249232292175, "learning_rate": 1.1755862068965517e-05, "loss": 0.3625, "step": 2844 }, { "epoch": 9.810344827586206, "grad_norm": 0.8422684073448181, "learning_rate": 1.1760000000000001e-05, "loss": 0.3236, "step": 2845 }, { "epoch": 9.813793103448276, "grad_norm": 0.9320348501205444, "learning_rate": 1.1764137931034483e-05, "loss": 0.3696, "step": 2846 }, { "epoch": 9.817241379310344, "grad_norm": 0.8216527700424194, "learning_rate": 1.1768275862068967e-05, "loss": 0.3759, "step": 2847 }, { "epoch": 9.820689655172414, "grad_norm": 1.3907221555709839, "learning_rate": 1.1772413793103449e-05, "loss": 0.3813, "step": 2848 }, { "epoch": 9.824137931034482, "grad_norm": 1.1835098266601562, "learning_rate": 1.1776551724137932e-05, "loss": 0.3768, "step": 2849 }, { "epoch": 9.827586206896552, "grad_norm": 1.052245020866394, "learning_rate": 1.1780689655172414e-05, "loss": 0.3828, "step": 2850 }, { "epoch": 9.831034482758621, "grad_norm": 1.0067946910858154, "learning_rate": 1.1784827586206896e-05, "loss": 0.3566, "step": 2851 }, { "epoch": 9.83448275862069, "grad_norm": 0.9768005013465881, "learning_rate": 1.1788965517241378e-05, "loss": 0.3594, "step": 2852 }, { "epoch": 9.837931034482759, "grad_norm": 1.2292876243591309, "learning_rate": 1.1793103448275862e-05, "loss": 0.3585, "step": 2853 }, { "epoch": 9.841379310344827, "grad_norm": 0.9376062750816345, "learning_rate": 1.1797241379310344e-05, "loss": 0.3818, "step": 2854 }, { "epoch": 9.844827586206897, "grad_norm": 1.1601488590240479, "learning_rate": 1.1801379310344828e-05, "loss": 0.3478, "step": 2855 }, { "epoch": 9.848275862068965, "grad_norm": 1.6478581428527832, "learning_rate": 1.180551724137931e-05, "loss": 0.4004, "step": 2856 }, { "epoch": 9.851724137931035, "grad_norm": 1.199449896812439, "learning_rate": 1.1809655172413794e-05, "loss": 0.3753, "step": 2857 }, { "epoch": 9.855172413793104, "grad_norm": 1.848005771636963, "learning_rate": 1.1813793103448276e-05, "loss": 0.4221, "step": 2858 }, { "epoch": 9.858620689655172, "grad_norm": 2.552335023880005, "learning_rate": 1.181793103448276e-05, "loss": 0.4231, "step": 2859 }, { "epoch": 9.862068965517242, "grad_norm": 2.6529369354248047, "learning_rate": 1.1822068965517241e-05, "loss": 0.5838, "step": 2860 }, { "epoch": 9.86551724137931, "grad_norm": 1.3360010385513306, "learning_rate": 1.1826206896551725e-05, "loss": 0.4637, "step": 2861 }, { "epoch": 9.86896551724138, "grad_norm": 0.8899229168891907, "learning_rate": 1.1830344827586207e-05, "loss": 0.3842, "step": 2862 }, { "epoch": 9.872413793103448, "grad_norm": 1.1263372898101807, "learning_rate": 1.183448275862069e-05, "loss": 0.3841, "step": 2863 }, { "epoch": 9.875862068965517, "grad_norm": 0.9636869430541992, "learning_rate": 1.1838620689655173e-05, "loss": 0.3678, "step": 2864 }, { "epoch": 9.879310344827585, "grad_norm": 1.0373361110687256, "learning_rate": 1.1842758620689655e-05, "loss": 0.3823, "step": 2865 }, { "epoch": 9.882758620689655, "grad_norm": 0.8402923345565796, "learning_rate": 1.1846896551724139e-05, "loss": 0.362, "step": 2866 }, { "epoch": 9.886206896551725, "grad_norm": 0.8424697518348694, "learning_rate": 1.185103448275862e-05, "loss": 0.4021, "step": 2867 }, { "epoch": 9.889655172413793, "grad_norm": 0.9928293824195862, "learning_rate": 1.1855172413793104e-05, "loss": 0.3615, "step": 2868 }, { "epoch": 9.893103448275863, "grad_norm": 0.8508875966072083, "learning_rate": 1.1859310344827586e-05, "loss": 0.3488, "step": 2869 }, { "epoch": 9.89655172413793, "grad_norm": 1.1668590307235718, "learning_rate": 1.186344827586207e-05, "loss": 0.3251, "step": 2870 }, { "epoch": 9.9, "grad_norm": 0.8877385258674622, "learning_rate": 1.1867586206896552e-05, "loss": 0.3564, "step": 2871 }, { "epoch": 9.903448275862068, "grad_norm": 0.8677090406417847, "learning_rate": 1.1871724137931036e-05, "loss": 0.3479, "step": 2872 }, { "epoch": 9.906896551724138, "grad_norm": 1.3296489715576172, "learning_rate": 1.1875862068965518e-05, "loss": 0.3632, "step": 2873 }, { "epoch": 9.910344827586208, "grad_norm": 1.1072213649749756, "learning_rate": 1.1880000000000001e-05, "loss": 0.3614, "step": 2874 }, { "epoch": 9.913793103448276, "grad_norm": 1.1449700593948364, "learning_rate": 1.1884137931034483e-05, "loss": 0.3475, "step": 2875 }, { "epoch": 9.917241379310346, "grad_norm": 0.9970369338989258, "learning_rate": 1.1888275862068965e-05, "loss": 0.3266, "step": 2876 }, { "epoch": 9.920689655172414, "grad_norm": 0.8546450138092041, "learning_rate": 1.1892413793103448e-05, "loss": 0.3491, "step": 2877 }, { "epoch": 9.924137931034483, "grad_norm": 1.1341227293014526, "learning_rate": 1.1896551724137931e-05, "loss": 0.3224, "step": 2878 }, { "epoch": 9.927586206896551, "grad_norm": 1.485824465751648, "learning_rate": 1.1900689655172413e-05, "loss": 0.3707, "step": 2879 }, { "epoch": 9.931034482758621, "grad_norm": 1.0073392391204834, "learning_rate": 1.1904827586206897e-05, "loss": 0.334, "step": 2880 }, { "epoch": 9.934482758620689, "grad_norm": 1.4164685010910034, "learning_rate": 1.1908965517241379e-05, "loss": 0.3513, "step": 2881 }, { "epoch": 9.937931034482759, "grad_norm": 1.2483446598052979, "learning_rate": 1.1913103448275863e-05, "loss": 0.3801, "step": 2882 }, { "epoch": 9.941379310344828, "grad_norm": 2.2597620487213135, "learning_rate": 1.1917241379310345e-05, "loss": 0.3844, "step": 2883 }, { "epoch": 9.944827586206896, "grad_norm": 1.6269371509552002, "learning_rate": 1.1921379310344828e-05, "loss": 0.4231, "step": 2884 }, { "epoch": 9.948275862068966, "grad_norm": 2.1048991680145264, "learning_rate": 1.192551724137931e-05, "loss": 0.5255, "step": 2885 }, { "epoch": 9.951724137931034, "grad_norm": 0.8718719482421875, "learning_rate": 1.1929655172413794e-05, "loss": 0.3632, "step": 2886 }, { "epoch": 9.955172413793104, "grad_norm": 0.7931032776832581, "learning_rate": 1.1933793103448276e-05, "loss": 0.3584, "step": 2887 }, { "epoch": 9.958620689655172, "grad_norm": 1.0599584579467773, "learning_rate": 1.193793103448276e-05, "loss": 0.3386, "step": 2888 }, { "epoch": 9.962068965517242, "grad_norm": 0.9736443758010864, "learning_rate": 1.194206896551724e-05, "loss": 0.3548, "step": 2889 }, { "epoch": 9.96551724137931, "grad_norm": 0.8460452556610107, "learning_rate": 1.1946206896551724e-05, "loss": 0.3524, "step": 2890 }, { "epoch": 9.96896551724138, "grad_norm": 1.0959559679031372, "learning_rate": 1.1950344827586206e-05, "loss": 0.3328, "step": 2891 }, { "epoch": 9.972413793103449, "grad_norm": 2.049031972885132, "learning_rate": 1.195448275862069e-05, "loss": 0.3676, "step": 2892 }, { "epoch": 9.975862068965517, "grad_norm": 1.235803246498108, "learning_rate": 1.1958620689655172e-05, "loss": 0.3438, "step": 2893 }, { "epoch": 9.979310344827587, "grad_norm": 0.9327772855758667, "learning_rate": 1.1962758620689655e-05, "loss": 0.3404, "step": 2894 }, { "epoch": 9.982758620689655, "grad_norm": 1.0790265798568726, "learning_rate": 1.1966896551724137e-05, "loss": 0.3416, "step": 2895 }, { "epoch": 9.986206896551725, "grad_norm": 1.162529706954956, "learning_rate": 1.1971034482758621e-05, "loss": 0.3721, "step": 2896 }, { "epoch": 9.989655172413793, "grad_norm": 1.5093469619750977, "learning_rate": 1.1975172413793105e-05, "loss": 0.3691, "step": 2897 }, { "epoch": 9.993103448275862, "grad_norm": 1.416782021522522, "learning_rate": 1.1979310344827587e-05, "loss": 0.382, "step": 2898 }, { "epoch": 9.99655172413793, "grad_norm": 1.2926690578460693, "learning_rate": 1.198344827586207e-05, "loss": 0.3907, "step": 2899 }, { "epoch": 10.0, "grad_norm": 2.0448784828186035, "learning_rate": 1.1987586206896553e-05, "loss": 0.5434, "step": 2900 }, { "epoch": 10.00344827586207, "grad_norm": 0.8107738494873047, "learning_rate": 1.1991724137931036e-05, "loss": 0.4005, "step": 2901 }, { "epoch": 10.006896551724138, "grad_norm": 1.0432556867599487, "learning_rate": 1.1995862068965517e-05, "loss": 0.3445, "step": 2902 }, { "epoch": 10.010344827586207, "grad_norm": 0.6568751931190491, "learning_rate": 1.2e-05, "loss": 0.3596, "step": 2903 }, { "epoch": 10.013793103448275, "grad_norm": 0.6657074093818665, "learning_rate": 1.2004137931034482e-05, "loss": 0.3327, "step": 2904 }, { "epoch": 10.017241379310345, "grad_norm": 0.9092724323272705, "learning_rate": 1.2008275862068966e-05, "loss": 0.381, "step": 2905 }, { "epoch": 10.020689655172413, "grad_norm": 0.808401882648468, "learning_rate": 1.2012413793103448e-05, "loss": 0.3454, "step": 2906 }, { "epoch": 10.024137931034483, "grad_norm": 0.8618733882904053, "learning_rate": 1.2016551724137932e-05, "loss": 0.3562, "step": 2907 }, { "epoch": 10.027586206896551, "grad_norm": 0.8548541069030762, "learning_rate": 1.2020689655172414e-05, "loss": 0.3477, "step": 2908 }, { "epoch": 10.03103448275862, "grad_norm": 1.3157318830490112, "learning_rate": 1.2024827586206897e-05, "loss": 0.3341, "step": 2909 }, { "epoch": 10.03448275862069, "grad_norm": 2.270007371902466, "learning_rate": 1.202896551724138e-05, "loss": 0.3386, "step": 2910 }, { "epoch": 10.037931034482758, "grad_norm": 1.2528873682022095, "learning_rate": 1.2033103448275863e-05, "loss": 0.3636, "step": 2911 }, { "epoch": 10.041379310344828, "grad_norm": 0.998748779296875, "learning_rate": 1.2037241379310345e-05, "loss": 0.3713, "step": 2912 }, { "epoch": 10.044827586206896, "grad_norm": 0.8917433023452759, "learning_rate": 1.2041379310344829e-05, "loss": 0.3275, "step": 2913 }, { "epoch": 10.048275862068966, "grad_norm": 1.3706871271133423, "learning_rate": 1.2045517241379311e-05, "loss": 0.3221, "step": 2914 }, { "epoch": 10.051724137931034, "grad_norm": 1.4880800247192383, "learning_rate": 1.2049655172413793e-05, "loss": 0.3494, "step": 2915 }, { "epoch": 10.055172413793104, "grad_norm": 1.1962004899978638, "learning_rate": 1.2053793103448275e-05, "loss": 0.3323, "step": 2916 }, { "epoch": 10.058620689655172, "grad_norm": 2.3715648651123047, "learning_rate": 1.2057931034482759e-05, "loss": 0.3422, "step": 2917 }, { "epoch": 10.062068965517241, "grad_norm": 1.0970667600631714, "learning_rate": 1.206206896551724e-05, "loss": 0.3632, "step": 2918 }, { "epoch": 10.065517241379311, "grad_norm": 1.5081065893173218, "learning_rate": 1.2066206896551724e-05, "loss": 0.3354, "step": 2919 }, { "epoch": 10.068965517241379, "grad_norm": 1.9512107372283936, "learning_rate": 1.2070344827586206e-05, "loss": 0.37, "step": 2920 }, { "epoch": 10.072413793103449, "grad_norm": 1.1695666313171387, "learning_rate": 1.207448275862069e-05, "loss": 0.366, "step": 2921 }, { "epoch": 10.075862068965517, "grad_norm": 1.1792480945587158, "learning_rate": 1.2078620689655172e-05, "loss": 0.3527, "step": 2922 }, { "epoch": 10.079310344827586, "grad_norm": 1.243155837059021, "learning_rate": 1.2082758620689656e-05, "loss": 0.394, "step": 2923 }, { "epoch": 10.082758620689654, "grad_norm": 1.533785343170166, "learning_rate": 1.2086896551724138e-05, "loss": 0.3571, "step": 2924 }, { "epoch": 10.086206896551724, "grad_norm": 2.8049957752227783, "learning_rate": 1.2091034482758622e-05, "loss": 0.581, "step": 2925 }, { "epoch": 10.089655172413794, "grad_norm": 0.8971036076545715, "learning_rate": 1.2095172413793104e-05, "loss": 0.3977, "step": 2926 }, { "epoch": 10.093103448275862, "grad_norm": 0.7579912543296814, "learning_rate": 1.2099310344827587e-05, "loss": 0.3324, "step": 2927 }, { "epoch": 10.096551724137932, "grad_norm": 0.9747946858406067, "learning_rate": 1.2103448275862068e-05, "loss": 0.3717, "step": 2928 }, { "epoch": 10.1, "grad_norm": 0.8020246028900146, "learning_rate": 1.2107586206896551e-05, "loss": 0.3596, "step": 2929 }, { "epoch": 10.10344827586207, "grad_norm": 1.0931512117385864, "learning_rate": 1.2111724137931035e-05, "loss": 0.3393, "step": 2930 }, { "epoch": 10.106896551724137, "grad_norm": 0.876821756362915, "learning_rate": 1.2115862068965517e-05, "loss": 0.3192, "step": 2931 }, { "epoch": 10.110344827586207, "grad_norm": 0.8440443873405457, "learning_rate": 1.2120000000000001e-05, "loss": 0.333, "step": 2932 }, { "epoch": 10.113793103448275, "grad_norm": 1.1017647981643677, "learning_rate": 1.2124137931034483e-05, "loss": 0.347, "step": 2933 }, { "epoch": 10.117241379310345, "grad_norm": 1.0651137828826904, "learning_rate": 1.2128275862068967e-05, "loss": 0.3275, "step": 2934 }, { "epoch": 10.120689655172415, "grad_norm": 1.0376938581466675, "learning_rate": 1.2132413793103449e-05, "loss": 0.3622, "step": 2935 }, { "epoch": 10.124137931034483, "grad_norm": 0.9367103576660156, "learning_rate": 1.2136551724137932e-05, "loss": 0.3672, "step": 2936 }, { "epoch": 10.127586206896552, "grad_norm": 0.899938702583313, "learning_rate": 1.2140689655172414e-05, "loss": 0.3553, "step": 2937 }, { "epoch": 10.13103448275862, "grad_norm": 0.9357295632362366, "learning_rate": 1.2144827586206898e-05, "loss": 0.3183, "step": 2938 }, { "epoch": 10.13448275862069, "grad_norm": 1.1052310466766357, "learning_rate": 1.214896551724138e-05, "loss": 0.3829, "step": 2939 }, { "epoch": 10.137931034482758, "grad_norm": 1.4444754123687744, "learning_rate": 1.2153103448275864e-05, "loss": 0.3627, "step": 2940 }, { "epoch": 10.141379310344828, "grad_norm": 5.646381855010986, "learning_rate": 1.2157241379310344e-05, "loss": 0.3301, "step": 2941 }, { "epoch": 10.144827586206896, "grad_norm": 0.9513159394264221, "learning_rate": 1.2161379310344828e-05, "loss": 0.331, "step": 2942 }, { "epoch": 10.148275862068965, "grad_norm": 1.198957085609436, "learning_rate": 1.216551724137931e-05, "loss": 0.3964, "step": 2943 }, { "epoch": 10.151724137931035, "grad_norm": 1.1039267778396606, "learning_rate": 1.2169655172413794e-05, "loss": 0.4145, "step": 2944 }, { "epoch": 10.155172413793103, "grad_norm": 2.0956287384033203, "learning_rate": 1.2173793103448276e-05, "loss": 0.3813, "step": 2945 }, { "epoch": 10.158620689655173, "grad_norm": 1.8842486143112183, "learning_rate": 1.217793103448276e-05, "loss": 0.4076, "step": 2946 }, { "epoch": 10.162068965517241, "grad_norm": 1.7065346240997314, "learning_rate": 1.2182068965517241e-05, "loss": 0.3482, "step": 2947 }, { "epoch": 10.16551724137931, "grad_norm": 2.7681431770324707, "learning_rate": 1.2186206896551725e-05, "loss": 0.3496, "step": 2948 }, { "epoch": 10.168965517241379, "grad_norm": 2.195158004760742, "learning_rate": 1.2190344827586207e-05, "loss": 0.4172, "step": 2949 }, { "epoch": 10.172413793103448, "grad_norm": 2.153038740158081, "learning_rate": 1.219448275862069e-05, "loss": 0.6049, "step": 2950 }, { "epoch": 10.175862068965516, "grad_norm": 0.8206453919410706, "learning_rate": 1.2198620689655173e-05, "loss": 0.3866, "step": 2951 }, { "epoch": 10.179310344827586, "grad_norm": 7.855020999908447, "learning_rate": 1.2202758620689656e-05, "loss": 0.3966, "step": 2952 }, { "epoch": 10.182758620689656, "grad_norm": 0.7872515916824341, "learning_rate": 1.2206896551724138e-05, "loss": 0.3671, "step": 2953 }, { "epoch": 10.186206896551724, "grad_norm": 1.041053295135498, "learning_rate": 1.221103448275862e-05, "loss": 0.3408, "step": 2954 }, { "epoch": 10.189655172413794, "grad_norm": 0.7624310255050659, "learning_rate": 1.2215172413793103e-05, "loss": 0.3508, "step": 2955 }, { "epoch": 10.193103448275862, "grad_norm": 0.8039630651473999, "learning_rate": 1.2219310344827586e-05, "loss": 0.3354, "step": 2956 }, { "epoch": 10.196551724137931, "grad_norm": 0.9940120577812195, "learning_rate": 1.2223448275862068e-05, "loss": 0.3415, "step": 2957 }, { "epoch": 10.2, "grad_norm": 1.0908204317092896, "learning_rate": 1.2227586206896552e-05, "loss": 0.3409, "step": 2958 }, { "epoch": 10.203448275862069, "grad_norm": 0.870954155921936, "learning_rate": 1.2231724137931034e-05, "loss": 0.3295, "step": 2959 }, { "epoch": 10.206896551724139, "grad_norm": 0.9652336835861206, "learning_rate": 1.2235862068965518e-05, "loss": 0.3418, "step": 2960 }, { "epoch": 10.210344827586207, "grad_norm": 0.8589680790901184, "learning_rate": 1.224e-05, "loss": 0.3506, "step": 2961 }, { "epoch": 10.213793103448277, "grad_norm": 0.850662112236023, "learning_rate": 1.2244137931034483e-05, "loss": 0.3496, "step": 2962 }, { "epoch": 10.217241379310344, "grad_norm": 1.1797642707824707, "learning_rate": 1.2248275862068967e-05, "loss": 0.347, "step": 2963 }, { "epoch": 10.220689655172414, "grad_norm": 0.9864853620529175, "learning_rate": 1.2252413793103449e-05, "loss": 0.3422, "step": 2964 }, { "epoch": 10.224137931034482, "grad_norm": 1.0418435335159302, "learning_rate": 1.2256551724137933e-05, "loss": 0.3643, "step": 2965 }, { "epoch": 10.227586206896552, "grad_norm": 1.0389777421951294, "learning_rate": 1.2260689655172413e-05, "loss": 0.318, "step": 2966 }, { "epoch": 10.23103448275862, "grad_norm": 1.0164437294006348, "learning_rate": 1.2264827586206897e-05, "loss": 0.346, "step": 2967 }, { "epoch": 10.23448275862069, "grad_norm": 1.4530915021896362, "learning_rate": 1.2268965517241379e-05, "loss": 0.3247, "step": 2968 }, { "epoch": 10.23793103448276, "grad_norm": 1.4298197031021118, "learning_rate": 1.2273103448275863e-05, "loss": 0.3342, "step": 2969 }, { "epoch": 10.241379310344827, "grad_norm": 2.182936906814575, "learning_rate": 1.2277241379310345e-05, "loss": 0.3541, "step": 2970 }, { "epoch": 10.244827586206897, "grad_norm": 1.9872980117797852, "learning_rate": 1.2281379310344828e-05, "loss": 0.4088, "step": 2971 }, { "epoch": 10.248275862068965, "grad_norm": 1.3936094045639038, "learning_rate": 1.228551724137931e-05, "loss": 0.3983, "step": 2972 }, { "epoch": 10.251724137931035, "grad_norm": 1.409278392791748, "learning_rate": 1.2289655172413794e-05, "loss": 0.4064, "step": 2973 }, { "epoch": 10.255172413793103, "grad_norm": 1.9352229833602905, "learning_rate": 1.2293793103448276e-05, "loss": 0.3916, "step": 2974 }, { "epoch": 10.258620689655173, "grad_norm": 2.2104909420013428, "learning_rate": 1.229793103448276e-05, "loss": 0.4871, "step": 2975 }, { "epoch": 10.26206896551724, "grad_norm": 0.9835953712463379, "learning_rate": 1.2302068965517242e-05, "loss": 0.4273, "step": 2976 }, { "epoch": 10.26551724137931, "grad_norm": 3.6104230880737305, "learning_rate": 1.2306206896551726e-05, "loss": 0.3594, "step": 2977 }, { "epoch": 10.26896551724138, "grad_norm": 1.2647032737731934, "learning_rate": 1.2310344827586208e-05, "loss": 0.3318, "step": 2978 }, { "epoch": 10.272413793103448, "grad_norm": 0.7517483830451965, "learning_rate": 1.231448275862069e-05, "loss": 0.3573, "step": 2979 }, { "epoch": 10.275862068965518, "grad_norm": 0.8137362599372864, "learning_rate": 1.2318620689655172e-05, "loss": 0.3344, "step": 2980 }, { "epoch": 10.279310344827586, "grad_norm": 0.8561412692070007, "learning_rate": 1.2322758620689655e-05, "loss": 0.3149, "step": 2981 }, { "epoch": 10.282758620689656, "grad_norm": 2.413445472717285, "learning_rate": 1.2326896551724137e-05, "loss": 0.3674, "step": 2982 }, { "epoch": 10.286206896551723, "grad_norm": 0.9062603712081909, "learning_rate": 1.2331034482758621e-05, "loss": 0.3661, "step": 2983 }, { "epoch": 10.289655172413793, "grad_norm": 0.8598234057426453, "learning_rate": 1.2335172413793103e-05, "loss": 0.3184, "step": 2984 }, { "epoch": 10.293103448275861, "grad_norm": 1.0901018381118774, "learning_rate": 1.2339310344827587e-05, "loss": 0.3332, "step": 2985 }, { "epoch": 10.296551724137931, "grad_norm": 0.8799340724945068, "learning_rate": 1.2343448275862069e-05, "loss": 0.3227, "step": 2986 }, { "epoch": 10.3, "grad_norm": 0.8692594766616821, "learning_rate": 1.2347586206896552e-05, "loss": 0.3655, "step": 2987 }, { "epoch": 10.303448275862069, "grad_norm": 0.8555806875228882, "learning_rate": 1.2351724137931035e-05, "loss": 0.3434, "step": 2988 }, { "epoch": 10.306896551724138, "grad_norm": 0.9457889795303345, "learning_rate": 1.2355862068965518e-05, "loss": 0.3548, "step": 2989 }, { "epoch": 10.310344827586206, "grad_norm": 2.71923565864563, "learning_rate": 1.236e-05, "loss": 0.3502, "step": 2990 }, { "epoch": 10.313793103448276, "grad_norm": 1.0124144554138184, "learning_rate": 1.2364137931034484e-05, "loss": 0.3917, "step": 2991 }, { "epoch": 10.317241379310344, "grad_norm": 0.8837091326713562, "learning_rate": 1.2368275862068964e-05, "loss": 0.3375, "step": 2992 }, { "epoch": 10.320689655172414, "grad_norm": 1.2728641033172607, "learning_rate": 1.2372413793103448e-05, "loss": 0.3325, "step": 2993 }, { "epoch": 10.324137931034482, "grad_norm": 1.037803292274475, "learning_rate": 1.237655172413793e-05, "loss": 0.3276, "step": 2994 }, { "epoch": 10.327586206896552, "grad_norm": 0.9876134395599365, "learning_rate": 1.2380689655172414e-05, "loss": 0.3349, "step": 2995 }, { "epoch": 10.331034482758621, "grad_norm": 1.5261738300323486, "learning_rate": 1.2384827586206896e-05, "loss": 0.3636, "step": 2996 }, { "epoch": 10.33448275862069, "grad_norm": 1.9936943054199219, "learning_rate": 1.238896551724138e-05, "loss": 0.3993, "step": 2997 }, { "epoch": 10.337931034482759, "grad_norm": 1.2832671403884888, "learning_rate": 1.2393103448275863e-05, "loss": 0.419, "step": 2998 }, { "epoch": 10.341379310344827, "grad_norm": 1.4842569828033447, "learning_rate": 1.2397241379310345e-05, "loss": 0.4591, "step": 2999 }, { "epoch": 10.344827586206897, "grad_norm": 1.8626819849014282, "learning_rate": 1.2401379310344829e-05, "loss": 0.5526, "step": 3000 }, { "epoch": 10.344827586206897, "eval_cer": 0.14606046547591434, "eval_loss": 0.36939913034439087, "eval_runtime": 18.1383, "eval_samples_per_second": 51.107, "eval_steps_per_second": 0.165, "eval_wer": 0.3530020703933747, "step": 3000 }, { "epoch": 10.348275862068965, "grad_norm": 1.063527226448059, "learning_rate": 1.2405517241379311e-05, "loss": 0.384, "step": 3001 }, { "epoch": 10.351724137931035, "grad_norm": 0.8275315761566162, "learning_rate": 1.2409655172413795e-05, "loss": 0.3817, "step": 3002 }, { "epoch": 10.355172413793104, "grad_norm": 0.7657791376113892, "learning_rate": 1.2413793103448277e-05, "loss": 0.3881, "step": 3003 }, { "epoch": 10.358620689655172, "grad_norm": 0.7694234251976013, "learning_rate": 1.241793103448276e-05, "loss": 0.3518, "step": 3004 }, { "epoch": 10.362068965517242, "grad_norm": 0.9225375652313232, "learning_rate": 1.242206896551724e-05, "loss": 0.3476, "step": 3005 }, { "epoch": 10.36551724137931, "grad_norm": 0.7016845941543579, "learning_rate": 1.2426206896551724e-05, "loss": 0.3425, "step": 3006 }, { "epoch": 10.36896551724138, "grad_norm": 0.9881346225738525, "learning_rate": 1.2430344827586206e-05, "loss": 0.3462, "step": 3007 }, { "epoch": 10.372413793103448, "grad_norm": 1.0189203023910522, "learning_rate": 1.243448275862069e-05, "loss": 0.333, "step": 3008 }, { "epoch": 10.375862068965517, "grad_norm": 0.9551504254341125, "learning_rate": 1.2438620689655172e-05, "loss": 0.357, "step": 3009 }, { "epoch": 10.379310344827585, "grad_norm": 0.8299809694290161, "learning_rate": 1.2442758620689656e-05, "loss": 0.3261, "step": 3010 }, { "epoch": 10.382758620689655, "grad_norm": 0.8699582815170288, "learning_rate": 1.2446896551724138e-05, "loss": 0.3457, "step": 3011 }, { "epoch": 10.386206896551725, "grad_norm": 1.1416590213775635, "learning_rate": 1.2451034482758622e-05, "loss": 0.349, "step": 3012 }, { "epoch": 10.389655172413793, "grad_norm": 1.0935887098312378, "learning_rate": 1.2455172413793104e-05, "loss": 0.3299, "step": 3013 }, { "epoch": 10.393103448275863, "grad_norm": 1.3229739665985107, "learning_rate": 1.2459310344827587e-05, "loss": 0.3487, "step": 3014 }, { "epoch": 10.39655172413793, "grad_norm": 1.0944656133651733, "learning_rate": 1.246344827586207e-05, "loss": 0.3566, "step": 3015 }, { "epoch": 10.4, "grad_norm": 1.0332319736480713, "learning_rate": 1.2467586206896553e-05, "loss": 0.3273, "step": 3016 }, { "epoch": 10.403448275862068, "grad_norm": 3.1210403442382812, "learning_rate": 1.2471724137931035e-05, "loss": 0.3589, "step": 3017 }, { "epoch": 10.406896551724138, "grad_norm": 1.2755279541015625, "learning_rate": 1.2475862068965517e-05, "loss": 0.3273, "step": 3018 }, { "epoch": 10.410344827586206, "grad_norm": 1.100537896156311, "learning_rate": 1.2479999999999999e-05, "loss": 0.3647, "step": 3019 }, { "epoch": 10.413793103448276, "grad_norm": 1.6455917358398438, "learning_rate": 1.2484137931034483e-05, "loss": 0.3361, "step": 3020 }, { "epoch": 10.417241379310346, "grad_norm": 5.992008686065674, "learning_rate": 1.2488275862068965e-05, "loss": 0.3583, "step": 3021 }, { "epoch": 10.420689655172414, "grad_norm": 1.3975757360458374, "learning_rate": 1.2492413793103449e-05, "loss": 0.3491, "step": 3022 }, { "epoch": 10.424137931034483, "grad_norm": 1.2348995208740234, "learning_rate": 1.249655172413793e-05, "loss": 0.36, "step": 3023 }, { "epoch": 10.427586206896551, "grad_norm": 1.903845191001892, "learning_rate": 1.2500689655172414e-05, "loss": 0.4878, "step": 3024 }, { "epoch": 10.431034482758621, "grad_norm": 1.9608074426651, "learning_rate": 1.2504827586206896e-05, "loss": 0.5358, "step": 3025 }, { "epoch": 10.434482758620689, "grad_norm": 0.9331625699996948, "learning_rate": 1.250896551724138e-05, "loss": 0.4015, "step": 3026 }, { "epoch": 10.437931034482759, "grad_norm": 0.7488003373146057, "learning_rate": 1.2513103448275862e-05, "loss": 0.3565, "step": 3027 }, { "epoch": 10.441379310344828, "grad_norm": 0.8678735494613647, "learning_rate": 1.2517241379310346e-05, "loss": 0.3691, "step": 3028 }, { "epoch": 10.444827586206896, "grad_norm": 0.7875158190727234, "learning_rate": 1.2521379310344828e-05, "loss": 0.3248, "step": 3029 }, { "epoch": 10.448275862068966, "grad_norm": 0.7564863562583923, "learning_rate": 1.2525517241379311e-05, "loss": 0.346, "step": 3030 }, { "epoch": 10.451724137931034, "grad_norm": 0.8584641814231873, "learning_rate": 1.2529655172413793e-05, "loss": 0.3382, "step": 3031 }, { "epoch": 10.455172413793104, "grad_norm": 0.982235312461853, "learning_rate": 1.2533793103448275e-05, "loss": 0.3484, "step": 3032 }, { "epoch": 10.458620689655172, "grad_norm": 0.9126214981079102, "learning_rate": 1.253793103448276e-05, "loss": 0.4054, "step": 3033 }, { "epoch": 10.462068965517242, "grad_norm": 0.9189257025718689, "learning_rate": 1.2542068965517241e-05, "loss": 0.3177, "step": 3034 }, { "epoch": 10.46551724137931, "grad_norm": 0.9287375211715698, "learning_rate": 1.2546206896551725e-05, "loss": 0.3512, "step": 3035 }, { "epoch": 10.46896551724138, "grad_norm": 1.1162084341049194, "learning_rate": 1.2550344827586207e-05, "loss": 0.3336, "step": 3036 }, { "epoch": 10.472413793103449, "grad_norm": 0.9215264320373535, "learning_rate": 1.255448275862069e-05, "loss": 0.3393, "step": 3037 }, { "epoch": 10.475862068965517, "grad_norm": 0.9112765789031982, "learning_rate": 1.2558620689655173e-05, "loss": 0.3379, "step": 3038 }, { "epoch": 10.479310344827587, "grad_norm": 1.0274611711502075, "learning_rate": 1.2562758620689656e-05, "loss": 0.3225, "step": 3039 }, { "epoch": 10.482758620689655, "grad_norm": 1.023264765739441, "learning_rate": 1.2566896551724138e-05, "loss": 0.374, "step": 3040 }, { "epoch": 10.486206896551725, "grad_norm": 1.1568139791488647, "learning_rate": 1.2571034482758622e-05, "loss": 0.3392, "step": 3041 }, { "epoch": 10.489655172413793, "grad_norm": 1.1709845066070557, "learning_rate": 1.2575172413793104e-05, "loss": 0.3116, "step": 3042 }, { "epoch": 10.493103448275862, "grad_norm": 0.8919686079025269, "learning_rate": 1.2579310344827588e-05, "loss": 0.3425, "step": 3043 }, { "epoch": 10.49655172413793, "grad_norm": 2.0534188747406006, "learning_rate": 1.2583448275862068e-05, "loss": 0.3867, "step": 3044 }, { "epoch": 10.5, "grad_norm": 1.1495596170425415, "learning_rate": 1.2587586206896552e-05, "loss": 0.3542, "step": 3045 }, { "epoch": 10.50344827586207, "grad_norm": 1.516329288482666, "learning_rate": 1.2591724137931034e-05, "loss": 0.3961, "step": 3046 }, { "epoch": 10.506896551724138, "grad_norm": 1.2722088098526, "learning_rate": 1.2595862068965518e-05, "loss": 0.3253, "step": 3047 }, { "epoch": 10.510344827586207, "grad_norm": 1.6014065742492676, "learning_rate": 1.26e-05, "loss": 0.3925, "step": 3048 }, { "epoch": 10.513793103448275, "grad_norm": 2.0930440425872803, "learning_rate": 1.2604137931034483e-05, "loss": 0.4176, "step": 3049 }, { "epoch": 10.517241379310345, "grad_norm": 1.6994346380233765, "learning_rate": 1.2608275862068965e-05, "loss": 0.5186, "step": 3050 }, { "epoch": 10.520689655172413, "grad_norm": 0.9614307284355164, "learning_rate": 1.2612413793103449e-05, "loss": 0.4035, "step": 3051 }, { "epoch": 10.524137931034483, "grad_norm": 0.7901360988616943, "learning_rate": 1.2616551724137931e-05, "loss": 0.3658, "step": 3052 }, { "epoch": 10.527586206896551, "grad_norm": 0.7953246235847473, "learning_rate": 1.2620689655172415e-05, "loss": 0.3231, "step": 3053 }, { "epoch": 10.53103448275862, "grad_norm": 1.1542376279830933, "learning_rate": 1.2624827586206897e-05, "loss": 0.3516, "step": 3054 }, { "epoch": 10.53448275862069, "grad_norm": 0.8073504567146301, "learning_rate": 1.262896551724138e-05, "loss": 0.336, "step": 3055 }, { "epoch": 10.537931034482758, "grad_norm": 1.2712332010269165, "learning_rate": 1.2633103448275863e-05, "loss": 0.3565, "step": 3056 }, { "epoch": 10.541379310344828, "grad_norm": 1.1251211166381836, "learning_rate": 1.2637241379310345e-05, "loss": 0.3656, "step": 3057 }, { "epoch": 10.544827586206896, "grad_norm": 1.208378553390503, "learning_rate": 1.2641379310344827e-05, "loss": 0.364, "step": 3058 }, { "epoch": 10.548275862068966, "grad_norm": 0.9432241320610046, "learning_rate": 1.264551724137931e-05, "loss": 0.3444, "step": 3059 }, { "epoch": 10.551724137931034, "grad_norm": 0.9642232656478882, "learning_rate": 1.2649655172413792e-05, "loss": 0.3173, "step": 3060 }, { "epoch": 10.555172413793104, "grad_norm": 1.1251659393310547, "learning_rate": 1.2653793103448276e-05, "loss": 0.3765, "step": 3061 }, { "epoch": 10.558620689655172, "grad_norm": 0.863918662071228, "learning_rate": 1.2657931034482758e-05, "loss": 0.3515, "step": 3062 }, { "epoch": 10.562068965517241, "grad_norm": 1.0854403972625732, "learning_rate": 1.2662068965517242e-05, "loss": 0.3328, "step": 3063 }, { "epoch": 10.565517241379311, "grad_norm": 0.9691739678382874, "learning_rate": 1.2666206896551725e-05, "loss": 0.362, "step": 3064 }, { "epoch": 10.568965517241379, "grad_norm": 1.1750404834747314, "learning_rate": 1.2670344827586208e-05, "loss": 0.3351, "step": 3065 }, { "epoch": 10.572413793103449, "grad_norm": 1.9112504720687866, "learning_rate": 1.2674482758620691e-05, "loss": 0.3811, "step": 3066 }, { "epoch": 10.575862068965517, "grad_norm": 1.0969105958938599, "learning_rate": 1.2678620689655173e-05, "loss": 0.3611, "step": 3067 }, { "epoch": 10.579310344827586, "grad_norm": 2.1956069469451904, "learning_rate": 1.2682758620689657e-05, "loss": 0.3512, "step": 3068 }, { "epoch": 10.582758620689654, "grad_norm": 1.024399995803833, "learning_rate": 1.2686896551724137e-05, "loss": 0.3474, "step": 3069 }, { "epoch": 10.586206896551724, "grad_norm": 1.7316800355911255, "learning_rate": 1.2691034482758621e-05, "loss": 0.3405, "step": 3070 }, { "epoch": 10.589655172413792, "grad_norm": 1.2432386875152588, "learning_rate": 1.2695172413793103e-05, "loss": 0.3214, "step": 3071 }, { "epoch": 10.593103448275862, "grad_norm": 1.3976976871490479, "learning_rate": 1.2699310344827587e-05, "loss": 0.3659, "step": 3072 }, { "epoch": 10.596551724137932, "grad_norm": 1.311755657196045, "learning_rate": 1.2703448275862069e-05, "loss": 0.3978, "step": 3073 }, { "epoch": 10.6, "grad_norm": 1.0688109397888184, "learning_rate": 1.2707586206896552e-05, "loss": 0.4332, "step": 3074 }, { "epoch": 10.60344827586207, "grad_norm": 6.986397743225098, "learning_rate": 1.2711724137931034e-05, "loss": 0.5197, "step": 3075 }, { "epoch": 10.606896551724137, "grad_norm": 1.4024488925933838, "learning_rate": 1.2715862068965518e-05, "loss": 0.3752, "step": 3076 }, { "epoch": 10.610344827586207, "grad_norm": 0.9769368767738342, "learning_rate": 1.272e-05, "loss": 0.3672, "step": 3077 }, { "epoch": 10.613793103448275, "grad_norm": 0.7840715050697327, "learning_rate": 1.2724137931034484e-05, "loss": 0.3539, "step": 3078 }, { "epoch": 10.617241379310345, "grad_norm": 0.9106207489967346, "learning_rate": 1.2728275862068966e-05, "loss": 0.3559, "step": 3079 }, { "epoch": 10.620689655172415, "grad_norm": 1.087127923965454, "learning_rate": 1.273241379310345e-05, "loss": 0.3383, "step": 3080 }, { "epoch": 10.624137931034483, "grad_norm": 0.7717306613922119, "learning_rate": 1.2736551724137932e-05, "loss": 0.3203, "step": 3081 }, { "epoch": 10.627586206896552, "grad_norm": 1.0862280130386353, "learning_rate": 1.2740689655172414e-05, "loss": 0.3682, "step": 3082 }, { "epoch": 10.63103448275862, "grad_norm": 0.8967858552932739, "learning_rate": 1.2744827586206896e-05, "loss": 0.3385, "step": 3083 }, { "epoch": 10.63448275862069, "grad_norm": 1.045687198638916, "learning_rate": 1.274896551724138e-05, "loss": 0.3297, "step": 3084 }, { "epoch": 10.637931034482758, "grad_norm": 1.097731590270996, "learning_rate": 1.2753103448275861e-05, "loss": 0.3396, "step": 3085 }, { "epoch": 10.641379310344828, "grad_norm": 0.942851185798645, "learning_rate": 1.2757241379310345e-05, "loss": 0.3647, "step": 3086 }, { "epoch": 10.644827586206896, "grad_norm": 1.129888892173767, "learning_rate": 1.2761379310344827e-05, "loss": 0.3787, "step": 3087 }, { "epoch": 10.648275862068965, "grad_norm": 0.8811078071594238, "learning_rate": 1.2765517241379311e-05, "loss": 0.3551, "step": 3088 }, { "epoch": 10.651724137931035, "grad_norm": 2.0879499912261963, "learning_rate": 1.2769655172413793e-05, "loss": 0.3226, "step": 3089 }, { "epoch": 10.655172413793103, "grad_norm": 1.2677329778671265, "learning_rate": 1.2773793103448277e-05, "loss": 0.3381, "step": 3090 }, { "epoch": 10.658620689655173, "grad_norm": 0.9925844669342041, "learning_rate": 1.2777931034482759e-05, "loss": 0.3467, "step": 3091 }, { "epoch": 10.662068965517241, "grad_norm": 1.0150421857833862, "learning_rate": 1.2782068965517242e-05, "loss": 0.3265, "step": 3092 }, { "epoch": 10.66551724137931, "grad_norm": 1.291105031967163, "learning_rate": 1.2786206896551724e-05, "loss": 0.377, "step": 3093 }, { "epoch": 10.668965517241379, "grad_norm": 1.2994647026062012, "learning_rate": 1.2790344827586208e-05, "loss": 0.3448, "step": 3094 }, { "epoch": 10.672413793103448, "grad_norm": 1.968620777130127, "learning_rate": 1.2794482758620688e-05, "loss": 0.3644, "step": 3095 }, { "epoch": 10.675862068965518, "grad_norm": 1.7972931861877441, "learning_rate": 1.2798620689655172e-05, "loss": 0.3872, "step": 3096 }, { "epoch": 10.679310344827586, "grad_norm": 5.126410484313965, "learning_rate": 1.2802758620689656e-05, "loss": 0.3649, "step": 3097 }, { "epoch": 10.682758620689656, "grad_norm": 1.3968881368637085, "learning_rate": 1.2806896551724138e-05, "loss": 0.4517, "step": 3098 }, { "epoch": 10.686206896551724, "grad_norm": 1.505624532699585, "learning_rate": 1.2811034482758622e-05, "loss": 0.3781, "step": 3099 }, { "epoch": 10.689655172413794, "grad_norm": 1.7942618131637573, "learning_rate": 1.2815172413793104e-05, "loss": 0.4634, "step": 3100 }, { "epoch": 10.693103448275862, "grad_norm": 0.8639079332351685, "learning_rate": 1.2819310344827587e-05, "loss": 0.3881, "step": 3101 }, { "epoch": 10.696551724137931, "grad_norm": 0.7489119172096252, "learning_rate": 1.282344827586207e-05, "loss": 0.3377, "step": 3102 }, { "epoch": 10.7, "grad_norm": 1.2114288806915283, "learning_rate": 1.2827586206896553e-05, "loss": 0.336, "step": 3103 }, { "epoch": 10.703448275862069, "grad_norm": 1.1149282455444336, "learning_rate": 1.2831724137931035e-05, "loss": 0.3876, "step": 3104 }, { "epoch": 10.706896551724139, "grad_norm": 1.2132489681243896, "learning_rate": 1.2835862068965519e-05, "loss": 0.3518, "step": 3105 }, { "epoch": 10.710344827586207, "grad_norm": 1.190088152885437, "learning_rate": 1.284e-05, "loss": 0.32, "step": 3106 }, { "epoch": 10.713793103448277, "grad_norm": 1.3162389993667603, "learning_rate": 1.2844137931034484e-05, "loss": 0.3646, "step": 3107 }, { "epoch": 10.717241379310344, "grad_norm": 0.9822313785552979, "learning_rate": 1.2848275862068965e-05, "loss": 0.3133, "step": 3108 }, { "epoch": 10.720689655172414, "grad_norm": 1.1374658346176147, "learning_rate": 1.2852413793103448e-05, "loss": 0.3235, "step": 3109 }, { "epoch": 10.724137931034482, "grad_norm": 0.9943152070045471, "learning_rate": 1.285655172413793e-05, "loss": 0.3594, "step": 3110 }, { "epoch": 10.727586206896552, "grad_norm": 1.295346736907959, "learning_rate": 1.2860689655172414e-05, "loss": 0.385, "step": 3111 }, { "epoch": 10.73103448275862, "grad_norm": 1.0803006887435913, "learning_rate": 1.2864827586206896e-05, "loss": 0.3335, "step": 3112 }, { "epoch": 10.73448275862069, "grad_norm": 1.05144202709198, "learning_rate": 1.286896551724138e-05, "loss": 0.3669, "step": 3113 }, { "epoch": 10.73793103448276, "grad_norm": 1.064429521560669, "learning_rate": 1.2873103448275862e-05, "loss": 0.335, "step": 3114 }, { "epoch": 10.741379310344827, "grad_norm": 1.0377814769744873, "learning_rate": 1.2877241379310346e-05, "loss": 0.4082, "step": 3115 }, { "epoch": 10.744827586206897, "grad_norm": 1.5023515224456787, "learning_rate": 1.2881379310344828e-05, "loss": 0.369, "step": 3116 }, { "epoch": 10.748275862068965, "grad_norm": 0.9652921557426453, "learning_rate": 1.2885517241379311e-05, "loss": 0.337, "step": 3117 }, { "epoch": 10.751724137931035, "grad_norm": 1.3072365522384644, "learning_rate": 1.2889655172413793e-05, "loss": 0.3554, "step": 3118 }, { "epoch": 10.755172413793103, "grad_norm": 1.4849977493286133, "learning_rate": 1.2893793103448277e-05, "loss": 0.3649, "step": 3119 }, { "epoch": 10.758620689655173, "grad_norm": 1.6259872913360596, "learning_rate": 1.2897931034482759e-05, "loss": 0.3749, "step": 3120 }, { "epoch": 10.76206896551724, "grad_norm": 1.6182194948196411, "learning_rate": 1.2902068965517241e-05, "loss": 0.376, "step": 3121 }, { "epoch": 10.76551724137931, "grad_norm": 1.1352097988128662, "learning_rate": 1.2906206896551723e-05, "loss": 0.3529, "step": 3122 }, { "epoch": 10.76896551724138, "grad_norm": 1.9019579887390137, "learning_rate": 1.2910344827586207e-05, "loss": 0.3738, "step": 3123 }, { "epoch": 10.772413793103448, "grad_norm": 1.5972808599472046, "learning_rate": 1.2914482758620689e-05, "loss": 0.3841, "step": 3124 }, { "epoch": 10.775862068965518, "grad_norm": 2.574253559112549, "learning_rate": 1.2918620689655173e-05, "loss": 0.4809, "step": 3125 }, { "epoch": 10.779310344827586, "grad_norm": 0.9635357856750488, "learning_rate": 1.2922758620689655e-05, "loss": 0.4018, "step": 3126 }, { "epoch": 10.782758620689656, "grad_norm": 1.2933486700057983, "learning_rate": 1.2926896551724138e-05, "loss": 0.3377, "step": 3127 }, { "epoch": 10.786206896551723, "grad_norm": 0.9407263994216919, "learning_rate": 1.293103448275862e-05, "loss": 0.3842, "step": 3128 }, { "epoch": 10.789655172413793, "grad_norm": 1.052274227142334, "learning_rate": 1.2935172413793104e-05, "loss": 0.3913, "step": 3129 }, { "epoch": 10.793103448275861, "grad_norm": 1.263647198677063, "learning_rate": 1.2939310344827588e-05, "loss": 0.3609, "step": 3130 }, { "epoch": 10.796551724137931, "grad_norm": 1.1197797060012817, "learning_rate": 1.294344827586207e-05, "loss": 0.3537, "step": 3131 }, { "epoch": 10.8, "grad_norm": 1.0810383558273315, "learning_rate": 1.2947586206896554e-05, "loss": 0.3406, "step": 3132 }, { "epoch": 10.803448275862069, "grad_norm": 0.9431266784667969, "learning_rate": 1.2951724137931036e-05, "loss": 0.3576, "step": 3133 }, { "epoch": 10.806896551724138, "grad_norm": 0.8949751257896423, "learning_rate": 1.2955862068965518e-05, "loss": 0.3519, "step": 3134 }, { "epoch": 10.810344827586206, "grad_norm": 2.422870635986328, "learning_rate": 1.296e-05, "loss": 0.3252, "step": 3135 }, { "epoch": 10.813793103448276, "grad_norm": 1.069590449333191, "learning_rate": 1.2964137931034483e-05, "loss": 0.3327, "step": 3136 }, { "epoch": 10.817241379310344, "grad_norm": 0.9802272319793701, "learning_rate": 1.2968275862068965e-05, "loss": 0.3478, "step": 3137 }, { "epoch": 10.820689655172414, "grad_norm": 1.199014663696289, "learning_rate": 1.2972413793103449e-05, "loss": 0.3323, "step": 3138 }, { "epoch": 10.824137931034482, "grad_norm": 1.0001989603042603, "learning_rate": 1.2976551724137931e-05, "loss": 0.3362, "step": 3139 }, { "epoch": 10.827586206896552, "grad_norm": 1.0699182748794556, "learning_rate": 1.2980689655172415e-05, "loss": 0.3397, "step": 3140 }, { "epoch": 10.831034482758621, "grad_norm": 0.9472194314002991, "learning_rate": 1.2984827586206897e-05, "loss": 0.3161, "step": 3141 }, { "epoch": 10.83448275862069, "grad_norm": 0.9342082142829895, "learning_rate": 1.298896551724138e-05, "loss": 0.3835, "step": 3142 }, { "epoch": 10.837931034482759, "grad_norm": 1.4122233390808105, "learning_rate": 1.2993103448275863e-05, "loss": 0.3436, "step": 3143 }, { "epoch": 10.841379310344827, "grad_norm": 1.263406753540039, "learning_rate": 1.2997241379310346e-05, "loss": 0.3608, "step": 3144 }, { "epoch": 10.844827586206897, "grad_norm": 1.4761050939559937, "learning_rate": 1.3001379310344828e-05, "loss": 0.3638, "step": 3145 }, { "epoch": 10.848275862068965, "grad_norm": 1.6856365203857422, "learning_rate": 1.300551724137931e-05, "loss": 0.4037, "step": 3146 }, { "epoch": 10.851724137931035, "grad_norm": 1.3372983932495117, "learning_rate": 1.3009655172413792e-05, "loss": 0.3634, "step": 3147 }, { "epoch": 10.855172413793104, "grad_norm": 1.56076979637146, "learning_rate": 1.3013793103448276e-05, "loss": 0.3736, "step": 3148 }, { "epoch": 10.858620689655172, "grad_norm": 1.3733294010162354, "learning_rate": 1.3017931034482758e-05, "loss": 0.377, "step": 3149 }, { "epoch": 10.862068965517242, "grad_norm": 2.305738687515259, "learning_rate": 1.3022068965517242e-05, "loss": 0.5079, "step": 3150 }, { "epoch": 10.86551724137931, "grad_norm": 0.8284775614738464, "learning_rate": 1.3026206896551724e-05, "loss": 0.3892, "step": 3151 }, { "epoch": 10.86896551724138, "grad_norm": 0.7142167091369629, "learning_rate": 1.3030344827586207e-05, "loss": 0.3703, "step": 3152 }, { "epoch": 10.872413793103448, "grad_norm": 0.8187963962554932, "learning_rate": 1.303448275862069e-05, "loss": 0.3377, "step": 3153 }, { "epoch": 10.875862068965517, "grad_norm": 0.8610280752182007, "learning_rate": 1.3038620689655173e-05, "loss": 0.3893, "step": 3154 }, { "epoch": 10.879310344827585, "grad_norm": 1.02534019947052, "learning_rate": 1.3042758620689655e-05, "loss": 0.3441, "step": 3155 }, { "epoch": 10.882758620689655, "grad_norm": 0.876893162727356, "learning_rate": 1.3046896551724139e-05, "loss": 0.3672, "step": 3156 }, { "epoch": 10.886206896551725, "grad_norm": 0.730902373790741, "learning_rate": 1.3051034482758621e-05, "loss": 0.372, "step": 3157 }, { "epoch": 10.889655172413793, "grad_norm": 0.7273479104042053, "learning_rate": 1.3055172413793105e-05, "loss": 0.3582, "step": 3158 }, { "epoch": 10.893103448275863, "grad_norm": 0.8174223899841309, "learning_rate": 1.3059310344827585e-05, "loss": 0.338, "step": 3159 }, { "epoch": 10.89655172413793, "grad_norm": 0.8375733494758606, "learning_rate": 1.3063448275862069e-05, "loss": 0.3229, "step": 3160 }, { "epoch": 10.9, "grad_norm": 2.5810353755950928, "learning_rate": 1.306758620689655e-05, "loss": 0.3509, "step": 3161 }, { "epoch": 10.903448275862068, "grad_norm": 1.0991883277893066, "learning_rate": 1.3071724137931034e-05, "loss": 0.3711, "step": 3162 }, { "epoch": 10.906896551724138, "grad_norm": 0.9382200241088867, "learning_rate": 1.3075862068965518e-05, "loss": 0.3287, "step": 3163 }, { "epoch": 10.910344827586208, "grad_norm": 1.1427806615829468, "learning_rate": 1.308e-05, "loss": 0.3431, "step": 3164 }, { "epoch": 10.913793103448276, "grad_norm": 1.0378915071487427, "learning_rate": 1.3084137931034484e-05, "loss": 0.382, "step": 3165 }, { "epoch": 10.917241379310346, "grad_norm": 1.0369219779968262, "learning_rate": 1.3088275862068966e-05, "loss": 0.355, "step": 3166 }, { "epoch": 10.920689655172414, "grad_norm": 1.4192836284637451, "learning_rate": 1.309241379310345e-05, "loss": 0.3651, "step": 3167 }, { "epoch": 10.924137931034483, "grad_norm": 0.9570999145507812, "learning_rate": 1.3096551724137932e-05, "loss": 0.3217, "step": 3168 }, { "epoch": 10.927586206896551, "grad_norm": 1.6069759130477905, "learning_rate": 1.3100689655172415e-05, "loss": 0.3419, "step": 3169 }, { "epoch": 10.931034482758621, "grad_norm": 1.085639476776123, "learning_rate": 1.3104827586206897e-05, "loss": 0.3659, "step": 3170 }, { "epoch": 10.934482758620689, "grad_norm": 2.3557772636413574, "learning_rate": 1.3108965517241381e-05, "loss": 0.3577, "step": 3171 }, { "epoch": 10.937931034482759, "grad_norm": 0.9392257332801819, "learning_rate": 1.3113103448275861e-05, "loss": 0.3467, "step": 3172 }, { "epoch": 10.941379310344828, "grad_norm": 2.366286516189575, "learning_rate": 1.3117241379310345e-05, "loss": 0.3239, "step": 3173 }, { "epoch": 10.944827586206896, "grad_norm": 1.5177277326583862, "learning_rate": 1.3121379310344827e-05, "loss": 0.4257, "step": 3174 }, { "epoch": 10.948275862068966, "grad_norm": 2.6561145782470703, "learning_rate": 1.312551724137931e-05, "loss": 0.5061, "step": 3175 }, { "epoch": 10.951724137931034, "grad_norm": 1.0555201768875122, "learning_rate": 1.3129655172413793e-05, "loss": 0.3919, "step": 3176 }, { "epoch": 10.955172413793104, "grad_norm": 0.9738144874572754, "learning_rate": 1.3133793103448277e-05, "loss": 0.3808, "step": 3177 }, { "epoch": 10.958620689655172, "grad_norm": 1.0059173107147217, "learning_rate": 1.3137931034482759e-05, "loss": 0.3437, "step": 3178 }, { "epoch": 10.962068965517242, "grad_norm": 1.1709898710250854, "learning_rate": 1.3142068965517242e-05, "loss": 0.3637, "step": 3179 }, { "epoch": 10.96551724137931, "grad_norm": 0.9164944887161255, "learning_rate": 1.3146206896551724e-05, "loss": 0.3335, "step": 3180 }, { "epoch": 10.96896551724138, "grad_norm": 0.8202800154685974, "learning_rate": 1.3150344827586208e-05, "loss": 0.3257, "step": 3181 }, { "epoch": 10.972413793103449, "grad_norm": 1.121233582496643, "learning_rate": 1.315448275862069e-05, "loss": 0.2941, "step": 3182 }, { "epoch": 10.975862068965517, "grad_norm": 1.0652177333831787, "learning_rate": 1.3158620689655174e-05, "loss": 0.3692, "step": 3183 }, { "epoch": 10.979310344827587, "grad_norm": 0.9900358319282532, "learning_rate": 1.3162758620689656e-05, "loss": 0.3472, "step": 3184 }, { "epoch": 10.982758620689655, "grad_norm": 1.5533225536346436, "learning_rate": 1.3166896551724138e-05, "loss": 0.3255, "step": 3185 }, { "epoch": 10.986206896551725, "grad_norm": 1.1987881660461426, "learning_rate": 1.317103448275862e-05, "loss": 0.3292, "step": 3186 }, { "epoch": 10.989655172413793, "grad_norm": 3.0499038696289062, "learning_rate": 1.3175172413793103e-05, "loss": 0.3418, "step": 3187 }, { "epoch": 10.993103448275862, "grad_norm": 1.161592721939087, "learning_rate": 1.3179310344827586e-05, "loss": 0.3568, "step": 3188 }, { "epoch": 10.99655172413793, "grad_norm": 2.065718173980713, "learning_rate": 1.318344827586207e-05, "loss": 0.3953, "step": 3189 }, { "epoch": 11.0, "grad_norm": 1.6698954105377197, "learning_rate": 1.3187586206896551e-05, "loss": 0.5386, "step": 3190 }, { "epoch": 11.00344827586207, "grad_norm": 1.4397395849227905, "learning_rate": 1.3191724137931035e-05, "loss": 0.3712, "step": 3191 }, { "epoch": 11.006896551724138, "grad_norm": 1.0469975471496582, "learning_rate": 1.3195862068965517e-05, "loss": 0.3892, "step": 3192 }, { "epoch": 11.010344827586207, "grad_norm": 1.1158932447433472, "learning_rate": 1.32e-05, "loss": 0.3676, "step": 3193 }, { "epoch": 11.013793103448275, "grad_norm": 0.8661589026451111, "learning_rate": 1.3204137931034483e-05, "loss": 0.3253, "step": 3194 }, { "epoch": 11.017241379310345, "grad_norm": 0.887975811958313, "learning_rate": 1.3208275862068966e-05, "loss": 0.3089, "step": 3195 }, { "epoch": 11.020689655172413, "grad_norm": 0.806361734867096, "learning_rate": 1.321241379310345e-05, "loss": 0.3502, "step": 3196 }, { "epoch": 11.024137931034483, "grad_norm": 0.8802788257598877, "learning_rate": 1.3216551724137932e-05, "loss": 0.3131, "step": 3197 }, { "epoch": 11.027586206896551, "grad_norm": 1.1776249408721924, "learning_rate": 1.3220689655172414e-05, "loss": 0.3614, "step": 3198 }, { "epoch": 11.03103448275862, "grad_norm": 0.930182695388794, "learning_rate": 1.3224827586206896e-05, "loss": 0.365, "step": 3199 }, { "epoch": 11.03448275862069, "grad_norm": 1.009589672088623, "learning_rate": 1.322896551724138e-05, "loss": 0.3169, "step": 3200 }, { "epoch": 11.037931034482758, "grad_norm": 1.4381179809570312, "learning_rate": 1.3233103448275862e-05, "loss": 0.3693, "step": 3201 }, { "epoch": 11.041379310344828, "grad_norm": 1.0246436595916748, "learning_rate": 1.3237241379310346e-05, "loss": 0.3819, "step": 3202 }, { "epoch": 11.044827586206896, "grad_norm": 1.1812119483947754, "learning_rate": 1.3241379310344828e-05, "loss": 0.35, "step": 3203 }, { "epoch": 11.048275862068966, "grad_norm": 1.1525590419769287, "learning_rate": 1.3245517241379311e-05, "loss": 0.3229, "step": 3204 }, { "epoch": 11.051724137931034, "grad_norm": 1.1732746362686157, "learning_rate": 1.3249655172413793e-05, "loss": 0.3374, "step": 3205 }, { "epoch": 11.055172413793104, "grad_norm": 1.044477105140686, "learning_rate": 1.3253793103448277e-05, "loss": 0.312, "step": 3206 }, { "epoch": 11.058620689655172, "grad_norm": 1.6311097145080566, "learning_rate": 1.3257931034482759e-05, "loss": 0.334, "step": 3207 }, { "epoch": 11.062068965517241, "grad_norm": 0.9125072956085205, "learning_rate": 1.3262068965517243e-05, "loss": 0.3183, "step": 3208 }, { "epoch": 11.065517241379311, "grad_norm": 1.1404973268508911, "learning_rate": 1.3266206896551725e-05, "loss": 0.339, "step": 3209 }, { "epoch": 11.068965517241379, "grad_norm": 1.424001693725586, "learning_rate": 1.3270344827586209e-05, "loss": 0.391, "step": 3210 }, { "epoch": 11.072413793103449, "grad_norm": 1.5109606981277466, "learning_rate": 1.3274482758620689e-05, "loss": 0.3686, "step": 3211 }, { "epoch": 11.075862068965517, "grad_norm": 1.2143325805664062, "learning_rate": 1.3278620689655173e-05, "loss": 0.3566, "step": 3212 }, { "epoch": 11.079310344827586, "grad_norm": 1.4759670495986938, "learning_rate": 1.3282758620689655e-05, "loss": 0.3617, "step": 3213 }, { "epoch": 11.082758620689654, "grad_norm": 1.5273007154464722, "learning_rate": 1.3286896551724138e-05, "loss": 0.3961, "step": 3214 }, { "epoch": 11.086206896551724, "grad_norm": 2.718690872192383, "learning_rate": 1.329103448275862e-05, "loss": 0.4712, "step": 3215 }, { "epoch": 11.089655172413794, "grad_norm": 0.8575903177261353, "learning_rate": 1.3295172413793104e-05, "loss": 0.3614, "step": 3216 }, { "epoch": 11.093103448275862, "grad_norm": 0.723325788974762, "learning_rate": 1.3299310344827586e-05, "loss": 0.3153, "step": 3217 }, { "epoch": 11.096551724137932, "grad_norm": 0.6935354471206665, "learning_rate": 1.330344827586207e-05, "loss": 0.3347, "step": 3218 }, { "epoch": 11.1, "grad_norm": 1.576832890510559, "learning_rate": 1.3307586206896552e-05, "loss": 0.3083, "step": 3219 }, { "epoch": 11.10344827586207, "grad_norm": 1.042279839515686, "learning_rate": 1.3311724137931035e-05, "loss": 0.3362, "step": 3220 }, { "epoch": 11.106896551724137, "grad_norm": 1.039164662361145, "learning_rate": 1.3315862068965518e-05, "loss": 0.3338, "step": 3221 }, { "epoch": 11.110344827586207, "grad_norm": 1.4294403791427612, "learning_rate": 1.3320000000000001e-05, "loss": 0.3778, "step": 3222 }, { "epoch": 11.113793103448275, "grad_norm": 1.0155497789382935, "learning_rate": 1.3324137931034483e-05, "loss": 0.3154, "step": 3223 }, { "epoch": 11.117241379310345, "grad_norm": 0.7955037951469421, "learning_rate": 1.3328275862068965e-05, "loss": 0.3381, "step": 3224 }, { "epoch": 11.120689655172415, "grad_norm": 1.1088993549346924, "learning_rate": 1.3332413793103447e-05, "loss": 0.3104, "step": 3225 }, { "epoch": 11.124137931034483, "grad_norm": 1.1683722734451294, "learning_rate": 1.3336551724137931e-05, "loss": 0.338, "step": 3226 }, { "epoch": 11.127586206896552, "grad_norm": 1.1208536624908447, "learning_rate": 1.3340689655172413e-05, "loss": 0.3796, "step": 3227 }, { "epoch": 11.13103448275862, "grad_norm": 3.7126126289367676, "learning_rate": 1.3344827586206897e-05, "loss": 0.2981, "step": 3228 }, { "epoch": 11.13448275862069, "grad_norm": 0.9971523284912109, "learning_rate": 1.334896551724138e-05, "loss": 0.3238, "step": 3229 }, { "epoch": 11.137931034482758, "grad_norm": 1.7032818794250488, "learning_rate": 1.3353103448275862e-05, "loss": 0.36, "step": 3230 }, { "epoch": 11.141379310344828, "grad_norm": 1.0740052461624146, "learning_rate": 1.3357241379310346e-05, "loss": 0.3461, "step": 3231 }, { "epoch": 11.144827586206896, "grad_norm": 1.0368099212646484, "learning_rate": 1.3361379310344828e-05, "loss": 0.3577, "step": 3232 }, { "epoch": 11.148275862068965, "grad_norm": 2.459246873855591, "learning_rate": 1.3365517241379312e-05, "loss": 0.3424, "step": 3233 }, { "epoch": 11.151724137931035, "grad_norm": 0.9375048279762268, "learning_rate": 1.3369655172413794e-05, "loss": 0.3247, "step": 3234 }, { "epoch": 11.155172413793103, "grad_norm": 2.2258567810058594, "learning_rate": 1.3373793103448278e-05, "loss": 0.3595, "step": 3235 }, { "epoch": 11.158620689655173, "grad_norm": 1.7222180366516113, "learning_rate": 1.3377931034482758e-05, "loss": 0.377, "step": 3236 }, { "epoch": 11.162068965517241, "grad_norm": 1.3219709396362305, "learning_rate": 1.3382068965517242e-05, "loss": 0.3416, "step": 3237 }, { "epoch": 11.16551724137931, "grad_norm": 2.215054988861084, "learning_rate": 1.3386206896551724e-05, "loss": 0.3434, "step": 3238 }, { "epoch": 11.168965517241379, "grad_norm": 1.5006215572357178, "learning_rate": 1.3390344827586207e-05, "loss": 0.4007, "step": 3239 }, { "epoch": 11.172413793103448, "grad_norm": 2.9251372814178467, "learning_rate": 1.339448275862069e-05, "loss": 0.5698, "step": 3240 }, { "epoch": 11.175862068965516, "grad_norm": 1.0414310693740845, "learning_rate": 1.3398620689655173e-05, "loss": 0.3642, "step": 3241 }, { "epoch": 11.179310344827586, "grad_norm": 2.2657060623168945, "learning_rate": 1.3402758620689655e-05, "loss": 0.316, "step": 3242 }, { "epoch": 11.182758620689656, "grad_norm": 0.7700024247169495, "learning_rate": 1.3406896551724139e-05, "loss": 0.3583, "step": 3243 }, { "epoch": 11.186206896551724, "grad_norm": 1.5030680894851685, "learning_rate": 1.3411034482758621e-05, "loss": 0.3489, "step": 3244 }, { "epoch": 11.189655172413794, "grad_norm": 1.041690707206726, "learning_rate": 1.3415172413793105e-05, "loss": 0.326, "step": 3245 }, { "epoch": 11.193103448275862, "grad_norm": 0.8587378263473511, "learning_rate": 1.3419310344827587e-05, "loss": 0.3745, "step": 3246 }, { "epoch": 11.196551724137931, "grad_norm": 0.9657180905342102, "learning_rate": 1.342344827586207e-05, "loss": 0.3596, "step": 3247 }, { "epoch": 11.2, "grad_norm": 0.7317667007446289, "learning_rate": 1.3427586206896552e-05, "loss": 0.3142, "step": 3248 }, { "epoch": 11.203448275862069, "grad_norm": 1.0382143259048462, "learning_rate": 1.3431724137931034e-05, "loss": 0.3227, "step": 3249 }, { "epoch": 11.206896551724139, "grad_norm": 1.2533258199691772, "learning_rate": 1.3435862068965516e-05, "loss": 0.3092, "step": 3250 }, { "epoch": 11.210344827586207, "grad_norm": 1.010066032409668, "learning_rate": 1.344e-05, "loss": 0.325, "step": 3251 }, { "epoch": 11.213793103448277, "grad_norm": 0.7321310639381409, "learning_rate": 1.3444137931034482e-05, "loss": 0.2975, "step": 3252 }, { "epoch": 11.217241379310344, "grad_norm": 1.0440282821655273, "learning_rate": 1.3448275862068966e-05, "loss": 0.3317, "step": 3253 }, { "epoch": 11.220689655172414, "grad_norm": 5.05422306060791, "learning_rate": 1.3452413793103448e-05, "loss": 0.3479, "step": 3254 }, { "epoch": 11.224137931034482, "grad_norm": 1.1068676710128784, "learning_rate": 1.3456551724137932e-05, "loss": 0.3608, "step": 3255 }, { "epoch": 11.227586206896552, "grad_norm": 1.3600422143936157, "learning_rate": 1.3460689655172414e-05, "loss": 0.3218, "step": 3256 }, { "epoch": 11.23103448275862, "grad_norm": 1.1454195976257324, "learning_rate": 1.3464827586206897e-05, "loss": 0.3166, "step": 3257 }, { "epoch": 11.23448275862069, "grad_norm": 1.0803663730621338, "learning_rate": 1.346896551724138e-05, "loss": 0.313, "step": 3258 }, { "epoch": 11.23793103448276, "grad_norm": 3.1906261444091797, "learning_rate": 1.3473103448275863e-05, "loss": 0.3483, "step": 3259 }, { "epoch": 11.241379310344827, "grad_norm": 1.4799070358276367, "learning_rate": 1.3477241379310345e-05, "loss": 0.3111, "step": 3260 }, { "epoch": 11.244827586206897, "grad_norm": 1.1131500005722046, "learning_rate": 1.3481379310344829e-05, "loss": 0.359, "step": 3261 }, { "epoch": 11.248275862068965, "grad_norm": 1.5620731115341187, "learning_rate": 1.3485517241379309e-05, "loss": 0.3591, "step": 3262 }, { "epoch": 11.251724137931035, "grad_norm": 1.189218282699585, "learning_rate": 1.3489655172413793e-05, "loss": 0.3565, "step": 3263 }, { "epoch": 11.255172413793103, "grad_norm": 1.3603081703186035, "learning_rate": 1.3493793103448276e-05, "loss": 0.4243, "step": 3264 }, { "epoch": 11.258620689655173, "grad_norm": 4.741518974304199, "learning_rate": 1.3497931034482758e-05, "loss": 0.5096, "step": 3265 }, { "epoch": 11.26206896551724, "grad_norm": 1.1361892223358154, "learning_rate": 1.3502068965517242e-05, "loss": 0.3561, "step": 3266 }, { "epoch": 11.26551724137931, "grad_norm": 0.9990695118904114, "learning_rate": 1.3506206896551724e-05, "loss": 0.3446, "step": 3267 }, { "epoch": 11.26896551724138, "grad_norm": 0.7361863851547241, "learning_rate": 1.3510344827586208e-05, "loss": 0.3409, "step": 3268 }, { "epoch": 11.272413793103448, "grad_norm": 0.9077271819114685, "learning_rate": 1.351448275862069e-05, "loss": 0.3371, "step": 3269 }, { "epoch": 11.275862068965518, "grad_norm": 0.8163717985153198, "learning_rate": 1.3518620689655174e-05, "loss": 0.3186, "step": 3270 }, { "epoch": 11.279310344827586, "grad_norm": 0.6644508838653564, "learning_rate": 1.3522758620689656e-05, "loss": 0.3171, "step": 3271 }, { "epoch": 11.282758620689656, "grad_norm": 0.9734000563621521, "learning_rate": 1.352689655172414e-05, "loss": 0.3715, "step": 3272 }, { "epoch": 11.286206896551723, "grad_norm": 1.2003371715545654, "learning_rate": 1.3531034482758621e-05, "loss": 0.3573, "step": 3273 }, { "epoch": 11.289655172413793, "grad_norm": 0.9708840847015381, "learning_rate": 1.3535172413793105e-05, "loss": 0.3382, "step": 3274 }, { "epoch": 11.293103448275861, "grad_norm": 0.7827529311180115, "learning_rate": 1.3539310344827585e-05, "loss": 0.329, "step": 3275 }, { "epoch": 11.296551724137931, "grad_norm": 0.912108838558197, "learning_rate": 1.354344827586207e-05, "loss": 0.3401, "step": 3276 }, { "epoch": 11.3, "grad_norm": 1.0015093088150024, "learning_rate": 1.3547586206896551e-05, "loss": 0.3658, "step": 3277 }, { "epoch": 11.303448275862069, "grad_norm": 1.4959125518798828, "learning_rate": 1.3551724137931035e-05, "loss": 0.2871, "step": 3278 }, { "epoch": 11.306896551724138, "grad_norm": 1.7901791334152222, "learning_rate": 1.3555862068965517e-05, "loss": 0.3338, "step": 3279 }, { "epoch": 11.310344827586206, "grad_norm": 1.0677779912948608, "learning_rate": 1.356e-05, "loss": 0.3214, "step": 3280 }, { "epoch": 11.313793103448276, "grad_norm": 1.1892796754837036, "learning_rate": 1.3564137931034483e-05, "loss": 0.3595, "step": 3281 }, { "epoch": 11.317241379310344, "grad_norm": 0.7852084040641785, "learning_rate": 1.3568275862068966e-05, "loss": 0.3509, "step": 3282 }, { "epoch": 11.320689655172414, "grad_norm": 1.6213786602020264, "learning_rate": 1.3572413793103448e-05, "loss": 0.32, "step": 3283 }, { "epoch": 11.324137931034482, "grad_norm": 0.9101031422615051, "learning_rate": 1.3576551724137932e-05, "loss": 0.3226, "step": 3284 }, { "epoch": 11.327586206896552, "grad_norm": 1.4252811670303345, "learning_rate": 1.3580689655172414e-05, "loss": 0.3267, "step": 3285 }, { "epoch": 11.331034482758621, "grad_norm": 1.6991486549377441, "learning_rate": 1.3584827586206898e-05, "loss": 0.3592, "step": 3286 }, { "epoch": 11.33448275862069, "grad_norm": 1.0188894271850586, "learning_rate": 1.358896551724138e-05, "loss": 0.3419, "step": 3287 }, { "epoch": 11.337931034482759, "grad_norm": 1.1324188709259033, "learning_rate": 1.3593103448275862e-05, "loss": 0.3508, "step": 3288 }, { "epoch": 11.341379310344827, "grad_norm": 1.8553287982940674, "learning_rate": 1.3597241379310344e-05, "loss": 0.396, "step": 3289 }, { "epoch": 11.344827586206897, "grad_norm": 2.5189621448516846, "learning_rate": 1.3601379310344828e-05, "loss": 0.4924, "step": 3290 }, { "epoch": 11.348275862068965, "grad_norm": 0.6747798323631287, "learning_rate": 1.360551724137931e-05, "loss": 0.361, "step": 3291 }, { "epoch": 11.351724137931035, "grad_norm": 0.6740288734436035, "learning_rate": 1.3609655172413793e-05, "loss": 0.352, "step": 3292 }, { "epoch": 11.355172413793104, "grad_norm": 1.0479086637496948, "learning_rate": 1.3613793103448275e-05, "loss": 0.335, "step": 3293 }, { "epoch": 11.358620689655172, "grad_norm": 0.8120093941688538, "learning_rate": 1.3617931034482759e-05, "loss": 0.3317, "step": 3294 }, { "epoch": 11.362068965517242, "grad_norm": 0.7238280177116394, "learning_rate": 1.3622068965517241e-05, "loss": 0.3665, "step": 3295 }, { "epoch": 11.36551724137931, "grad_norm": 1.1168516874313354, "learning_rate": 1.3626206896551725e-05, "loss": 0.3339, "step": 3296 }, { "epoch": 11.36896551724138, "grad_norm": 0.8043643236160278, "learning_rate": 1.3630344827586208e-05, "loss": 0.299, "step": 3297 }, { "epoch": 11.372413793103448, "grad_norm": 0.8797531127929688, "learning_rate": 1.363448275862069e-05, "loss": 0.3638, "step": 3298 }, { "epoch": 11.375862068965517, "grad_norm": 1.0785415172576904, "learning_rate": 1.3638620689655174e-05, "loss": 0.2987, "step": 3299 }, { "epoch": 11.379310344827585, "grad_norm": 0.8798601031303406, "learning_rate": 1.3642758620689656e-05, "loss": 0.3454, "step": 3300 }, { "epoch": 11.382758620689655, "grad_norm": 0.8822892904281616, "learning_rate": 1.3646896551724138e-05, "loss": 0.3257, "step": 3301 }, { "epoch": 11.386206896551725, "grad_norm": 0.9360131025314331, "learning_rate": 1.365103448275862e-05, "loss": 0.3483, "step": 3302 }, { "epoch": 11.389655172413793, "grad_norm": 1.552223563194275, "learning_rate": 1.3655172413793104e-05, "loss": 0.3221, "step": 3303 }, { "epoch": 11.393103448275863, "grad_norm": 0.8967574238777161, "learning_rate": 1.3659310344827586e-05, "loss": 0.3141, "step": 3304 }, { "epoch": 11.39655172413793, "grad_norm": 1.3028008937835693, "learning_rate": 1.366344827586207e-05, "loss": 0.3462, "step": 3305 }, { "epoch": 11.4, "grad_norm": 1.2722865343093872, "learning_rate": 1.3667586206896552e-05, "loss": 0.3148, "step": 3306 }, { "epoch": 11.403448275862068, "grad_norm": 1.2922803163528442, "learning_rate": 1.3671724137931035e-05, "loss": 0.3258, "step": 3307 }, { "epoch": 11.406896551724138, "grad_norm": 1.402710199356079, "learning_rate": 1.3675862068965517e-05, "loss": 0.345, "step": 3308 }, { "epoch": 11.410344827586206, "grad_norm": 0.955409049987793, "learning_rate": 1.3680000000000001e-05, "loss": 0.3443, "step": 3309 }, { "epoch": 11.413793103448276, "grad_norm": 1.138731598854065, "learning_rate": 1.3684137931034483e-05, "loss": 0.3201, "step": 3310 }, { "epoch": 11.417241379310346, "grad_norm": 1.1464985609054565, "learning_rate": 1.3688275862068967e-05, "loss": 0.3539, "step": 3311 }, { "epoch": 11.420689655172414, "grad_norm": 1.2213330268859863, "learning_rate": 1.3692413793103449e-05, "loss": 0.3471, "step": 3312 }, { "epoch": 11.424137931034483, "grad_norm": 1.4652957916259766, "learning_rate": 1.3696551724137933e-05, "loss": 0.4105, "step": 3313 }, { "epoch": 11.427586206896551, "grad_norm": 1.3127925395965576, "learning_rate": 1.3700689655172413e-05, "loss": 0.4413, "step": 3314 }, { "epoch": 11.431034482758621, "grad_norm": 1.9497499465942383, "learning_rate": 1.3704827586206897e-05, "loss": 0.5192, "step": 3315 }, { "epoch": 11.434482758620689, "grad_norm": 0.8001726865768433, "learning_rate": 1.3708965517241379e-05, "loss": 0.373, "step": 3316 }, { "epoch": 11.437931034482759, "grad_norm": 0.8624141812324524, "learning_rate": 1.3713103448275862e-05, "loss": 0.308, "step": 3317 }, { "epoch": 11.441379310344828, "grad_norm": 0.7850614190101624, "learning_rate": 1.3717241379310344e-05, "loss": 0.3226, "step": 3318 }, { "epoch": 11.444827586206896, "grad_norm": 0.8957758545875549, "learning_rate": 1.3721379310344828e-05, "loss": 0.3808, "step": 3319 }, { "epoch": 11.448275862068966, "grad_norm": 1.3020302057266235, "learning_rate": 1.372551724137931e-05, "loss": 0.3302, "step": 3320 }, { "epoch": 11.451724137931034, "grad_norm": 0.829849898815155, "learning_rate": 1.3729655172413794e-05, "loss": 0.3229, "step": 3321 }, { "epoch": 11.455172413793104, "grad_norm": 0.9412949085235596, "learning_rate": 1.3733793103448276e-05, "loss": 0.3753, "step": 3322 }, { "epoch": 11.458620689655172, "grad_norm": 0.8389825820922852, "learning_rate": 1.373793103448276e-05, "loss": 0.3412, "step": 3323 }, { "epoch": 11.462068965517242, "grad_norm": 0.9633886814117432, "learning_rate": 1.3742068965517242e-05, "loss": 0.3253, "step": 3324 }, { "epoch": 11.46551724137931, "grad_norm": 1.1741843223571777, "learning_rate": 1.3746206896551725e-05, "loss": 0.3292, "step": 3325 }, { "epoch": 11.46896551724138, "grad_norm": 0.7436055541038513, "learning_rate": 1.3750344827586207e-05, "loss": 0.3453, "step": 3326 }, { "epoch": 11.472413793103449, "grad_norm": 0.8318148255348206, "learning_rate": 1.375448275862069e-05, "loss": 0.343, "step": 3327 }, { "epoch": 11.475862068965517, "grad_norm": 0.8691243529319763, "learning_rate": 1.3758620689655171e-05, "loss": 0.3599, "step": 3328 }, { "epoch": 11.479310344827587, "grad_norm": 1.041658878326416, "learning_rate": 1.3762758620689655e-05, "loss": 0.3506, "step": 3329 }, { "epoch": 11.482758620689655, "grad_norm": 0.9654300212860107, "learning_rate": 1.3766896551724139e-05, "loss": 0.3184, "step": 3330 }, { "epoch": 11.486206896551725, "grad_norm": 1.0803145170211792, "learning_rate": 1.377103448275862e-05, "loss": 0.3405, "step": 3331 }, { "epoch": 11.489655172413793, "grad_norm": 1.4602540731430054, "learning_rate": 1.3775172413793105e-05, "loss": 0.3251, "step": 3332 }, { "epoch": 11.493103448275862, "grad_norm": 0.965764045715332, "learning_rate": 1.3779310344827587e-05, "loss": 0.3639, "step": 3333 }, { "epoch": 11.49655172413793, "grad_norm": 1.4181759357452393, "learning_rate": 1.378344827586207e-05, "loss": 0.3572, "step": 3334 }, { "epoch": 11.5, "grad_norm": 1.273405909538269, "learning_rate": 1.3787586206896552e-05, "loss": 0.33, "step": 3335 }, { "epoch": 11.50344827586207, "grad_norm": 1.2741265296936035, "learning_rate": 1.3791724137931036e-05, "loss": 0.351, "step": 3336 }, { "epoch": 11.506896551724138, "grad_norm": 1.497555136680603, "learning_rate": 1.3795862068965518e-05, "loss": 0.3198, "step": 3337 }, { "epoch": 11.510344827586207, "grad_norm": 1.1657476425170898, "learning_rate": 1.3800000000000002e-05, "loss": 0.3595, "step": 3338 }, { "epoch": 11.513793103448275, "grad_norm": 1.2922276258468628, "learning_rate": 1.3804137931034482e-05, "loss": 0.4158, "step": 3339 }, { "epoch": 11.517241379310345, "grad_norm": 2.0430259704589844, "learning_rate": 1.3808275862068966e-05, "loss": 0.4613, "step": 3340 }, { "epoch": 11.520689655172413, "grad_norm": 1.0750842094421387, "learning_rate": 1.3812413793103448e-05, "loss": 0.4326, "step": 3341 }, { "epoch": 11.524137931034483, "grad_norm": 0.8288525938987732, "learning_rate": 1.3816551724137931e-05, "loss": 0.3769, "step": 3342 }, { "epoch": 11.527586206896551, "grad_norm": 0.792072057723999, "learning_rate": 1.3820689655172413e-05, "loss": 0.3362, "step": 3343 }, { "epoch": 11.53103448275862, "grad_norm": 0.7584896087646484, "learning_rate": 1.3824827586206897e-05, "loss": 0.319, "step": 3344 }, { "epoch": 11.53448275862069, "grad_norm": 0.7847803235054016, "learning_rate": 1.382896551724138e-05, "loss": 0.3461, "step": 3345 }, { "epoch": 11.537931034482758, "grad_norm": 0.7675182819366455, "learning_rate": 1.3833103448275863e-05, "loss": 0.3164, "step": 3346 }, { "epoch": 11.541379310344828, "grad_norm": 0.8425546884536743, "learning_rate": 1.3837241379310345e-05, "loss": 0.3119, "step": 3347 }, { "epoch": 11.544827586206896, "grad_norm": 0.7618127465248108, "learning_rate": 1.3841379310344829e-05, "loss": 0.343, "step": 3348 }, { "epoch": 11.548275862068966, "grad_norm": 1.706191897392273, "learning_rate": 1.384551724137931e-05, "loss": 0.3084, "step": 3349 }, { "epoch": 11.551724137931034, "grad_norm": 0.9873750805854797, "learning_rate": 1.3849655172413794e-05, "loss": 0.3261, "step": 3350 }, { "epoch": 11.555172413793104, "grad_norm": 1.0222564935684204, "learning_rate": 1.3853793103448276e-05, "loss": 0.316, "step": 3351 }, { "epoch": 11.558620689655172, "grad_norm": 2.5476012229919434, "learning_rate": 1.3857931034482758e-05, "loss": 0.3284, "step": 3352 }, { "epoch": 11.562068965517241, "grad_norm": 1.1234626770019531, "learning_rate": 1.386206896551724e-05, "loss": 0.3404, "step": 3353 }, { "epoch": 11.565517241379311, "grad_norm": 1.0146490335464478, "learning_rate": 1.3866206896551724e-05, "loss": 0.3111, "step": 3354 }, { "epoch": 11.568965517241379, "grad_norm": 2.054180383682251, "learning_rate": 1.3870344827586206e-05, "loss": 0.296, "step": 3355 }, { "epoch": 11.572413793103449, "grad_norm": 1.1721282005310059, "learning_rate": 1.387448275862069e-05, "loss": 0.3418, "step": 3356 }, { "epoch": 11.575862068965517, "grad_norm": 1.0522756576538086, "learning_rate": 1.3878620689655172e-05, "loss": 0.356, "step": 3357 }, { "epoch": 11.579310344827586, "grad_norm": 1.0960115194320679, "learning_rate": 1.3882758620689656e-05, "loss": 0.3525, "step": 3358 }, { "epoch": 11.582758620689654, "grad_norm": 1.1422805786132812, "learning_rate": 1.3886896551724138e-05, "loss": 0.3281, "step": 3359 }, { "epoch": 11.586206896551724, "grad_norm": 1.0512808561325073, "learning_rate": 1.3891034482758621e-05, "loss": 0.3661, "step": 3360 }, { "epoch": 11.589655172413792, "grad_norm": 1.187052845954895, "learning_rate": 1.3895172413793103e-05, "loss": 0.3495, "step": 3361 }, { "epoch": 11.593103448275862, "grad_norm": 1.1517788171768188, "learning_rate": 1.3899310344827587e-05, "loss": 0.3607, "step": 3362 }, { "epoch": 11.596551724137932, "grad_norm": 1.2795299291610718, "learning_rate": 1.390344827586207e-05, "loss": 0.3536, "step": 3363 }, { "epoch": 11.6, "grad_norm": 1.953080415725708, "learning_rate": 1.3907586206896553e-05, "loss": 0.3469, "step": 3364 }, { "epoch": 11.60344827586207, "grad_norm": 2.13566255569458, "learning_rate": 1.3911724137931035e-05, "loss": 0.4618, "step": 3365 }, { "epoch": 11.606896551724137, "grad_norm": 0.8891032934188843, "learning_rate": 1.3915862068965517e-05, "loss": 0.4062, "step": 3366 }, { "epoch": 11.610344827586207, "grad_norm": 0.7980998754501343, "learning_rate": 1.392e-05, "loss": 0.374, "step": 3367 }, { "epoch": 11.613793103448275, "grad_norm": 0.9827781915664673, "learning_rate": 1.3924137931034483e-05, "loss": 0.3352, "step": 3368 }, { "epoch": 11.617241379310345, "grad_norm": 0.9528266191482544, "learning_rate": 1.3928275862068966e-05, "loss": 0.3885, "step": 3369 }, { "epoch": 11.620689655172415, "grad_norm": 2.095320224761963, "learning_rate": 1.3932413793103448e-05, "loss": 0.3333, "step": 3370 }, { "epoch": 11.624137931034483, "grad_norm": 0.7808279991149902, "learning_rate": 1.3936551724137932e-05, "loss": 0.2943, "step": 3371 }, { "epoch": 11.627586206896552, "grad_norm": 0.8417533040046692, "learning_rate": 1.3940689655172414e-05, "loss": 0.3431, "step": 3372 }, { "epoch": 11.63103448275862, "grad_norm": 0.7607225179672241, "learning_rate": 1.3944827586206898e-05, "loss": 0.2967, "step": 3373 }, { "epoch": 11.63448275862069, "grad_norm": 0.9182493686676025, "learning_rate": 1.394896551724138e-05, "loss": 0.3154, "step": 3374 }, { "epoch": 11.637931034482758, "grad_norm": 0.7232106924057007, "learning_rate": 1.3953103448275863e-05, "loss": 0.317, "step": 3375 }, { "epoch": 11.641379310344828, "grad_norm": 0.8541457056999207, "learning_rate": 1.3957241379310345e-05, "loss": 0.3417, "step": 3376 }, { "epoch": 11.644827586206896, "grad_norm": 1.042543649673462, "learning_rate": 1.396137931034483e-05, "loss": 0.34, "step": 3377 }, { "epoch": 11.648275862068965, "grad_norm": 0.9095512628555298, "learning_rate": 1.396551724137931e-05, "loss": 0.2856, "step": 3378 }, { "epoch": 11.651724137931035, "grad_norm": 0.8184096813201904, "learning_rate": 1.3969655172413793e-05, "loss": 0.3327, "step": 3379 }, { "epoch": 11.655172413793103, "grad_norm": 0.9785550832748413, "learning_rate": 1.3973793103448275e-05, "loss": 0.3951, "step": 3380 }, { "epoch": 11.658620689655173, "grad_norm": 1.1087241172790527, "learning_rate": 1.3977931034482759e-05, "loss": 0.3215, "step": 3381 }, { "epoch": 11.662068965517241, "grad_norm": 0.8940266370773315, "learning_rate": 1.3982068965517241e-05, "loss": 0.3358, "step": 3382 }, { "epoch": 11.66551724137931, "grad_norm": 0.9942477941513062, "learning_rate": 1.3986206896551725e-05, "loss": 0.3501, "step": 3383 }, { "epoch": 11.668965517241379, "grad_norm": 0.8027575612068176, "learning_rate": 1.3990344827586207e-05, "loss": 0.3399, "step": 3384 }, { "epoch": 11.672413793103448, "grad_norm": 1.822160243988037, "learning_rate": 1.399448275862069e-05, "loss": 0.3228, "step": 3385 }, { "epoch": 11.675862068965518, "grad_norm": 1.500719428062439, "learning_rate": 1.3998620689655172e-05, "loss": 0.3967, "step": 3386 }, { "epoch": 11.679310344827586, "grad_norm": 1.291251540184021, "learning_rate": 1.4002758620689656e-05, "loss": 0.3372, "step": 3387 }, { "epoch": 11.682758620689656, "grad_norm": 1.407959222793579, "learning_rate": 1.4006896551724138e-05, "loss": 0.3744, "step": 3388 }, { "epoch": 11.686206896551724, "grad_norm": 2.9609432220458984, "learning_rate": 1.4011034482758622e-05, "loss": 0.3661, "step": 3389 }, { "epoch": 11.689655172413794, "grad_norm": 8.311430931091309, "learning_rate": 1.4015172413793104e-05, "loss": 0.5343, "step": 3390 }, { "epoch": 11.693103448275862, "grad_norm": 1.1006640195846558, "learning_rate": 1.4019310344827586e-05, "loss": 0.4039, "step": 3391 }, { "epoch": 11.696551724137931, "grad_norm": 1.5307661294937134, "learning_rate": 1.4023448275862068e-05, "loss": 0.3528, "step": 3392 }, { "epoch": 11.7, "grad_norm": 0.8762801289558411, "learning_rate": 1.4027586206896552e-05, "loss": 0.3763, "step": 3393 }, { "epoch": 11.703448275862069, "grad_norm": 1.8806381225585938, "learning_rate": 1.4031724137931034e-05, "loss": 0.3205, "step": 3394 }, { "epoch": 11.706896551724139, "grad_norm": 0.6327629089355469, "learning_rate": 1.4035862068965517e-05, "loss": 0.3586, "step": 3395 }, { "epoch": 11.710344827586207, "grad_norm": 0.8424249291419983, "learning_rate": 1.4040000000000001e-05, "loss": 0.3331, "step": 3396 }, { "epoch": 11.713793103448277, "grad_norm": 0.841160237789154, "learning_rate": 1.4044137931034483e-05, "loss": 0.337, "step": 3397 }, { "epoch": 11.717241379310344, "grad_norm": 0.7549057006835938, "learning_rate": 1.4048275862068967e-05, "loss": 0.3535, "step": 3398 }, { "epoch": 11.720689655172414, "grad_norm": 0.9438590407371521, "learning_rate": 1.4052413793103449e-05, "loss": 0.3021, "step": 3399 }, { "epoch": 11.724137931034482, "grad_norm": 1.1376750469207764, "learning_rate": 1.4056551724137933e-05, "loss": 0.3369, "step": 3400 }, { "epoch": 11.727586206896552, "grad_norm": 0.8272047638893127, "learning_rate": 1.4060689655172415e-05, "loss": 0.3127, "step": 3401 }, { "epoch": 11.73103448275862, "grad_norm": 0.9681031107902527, "learning_rate": 1.4064827586206898e-05, "loss": 0.3235, "step": 3402 }, { "epoch": 11.73448275862069, "grad_norm": 1.034421682357788, "learning_rate": 1.406896551724138e-05, "loss": 0.3144, "step": 3403 }, { "epoch": 11.73793103448276, "grad_norm": 1.296126127243042, "learning_rate": 1.4073103448275862e-05, "loss": 0.3304, "step": 3404 }, { "epoch": 11.741379310344827, "grad_norm": 0.9887458682060242, "learning_rate": 1.4077241379310344e-05, "loss": 0.3526, "step": 3405 }, { "epoch": 11.744827586206897, "grad_norm": 1.0094449520111084, "learning_rate": 1.4081379310344828e-05, "loss": 0.3041, "step": 3406 }, { "epoch": 11.748275862068965, "grad_norm": 1.5662362575531006, "learning_rate": 1.408551724137931e-05, "loss": 0.3181, "step": 3407 }, { "epoch": 11.751724137931035, "grad_norm": 1.090477705001831, "learning_rate": 1.4089655172413794e-05, "loss": 0.3319, "step": 3408 }, { "epoch": 11.755172413793103, "grad_norm": 1.365688443183899, "learning_rate": 1.4093793103448276e-05, "loss": 0.3616, "step": 3409 }, { "epoch": 11.758620689655173, "grad_norm": 2.0579097270965576, "learning_rate": 1.409793103448276e-05, "loss": 0.3407, "step": 3410 }, { "epoch": 11.76206896551724, "grad_norm": 1.8888977766036987, "learning_rate": 1.4102068965517242e-05, "loss": 0.3618, "step": 3411 }, { "epoch": 11.76551724137931, "grad_norm": 1.1950198411941528, "learning_rate": 1.4106206896551725e-05, "loss": 0.3415, "step": 3412 }, { "epoch": 11.76896551724138, "grad_norm": 1.8042329549789429, "learning_rate": 1.4110344827586207e-05, "loss": 0.384, "step": 3413 }, { "epoch": 11.772413793103448, "grad_norm": 1.7703242301940918, "learning_rate": 1.4114482758620691e-05, "loss": 0.4131, "step": 3414 }, { "epoch": 11.775862068965518, "grad_norm": 2.2965424060821533, "learning_rate": 1.4118620689655173e-05, "loss": 0.484, "step": 3415 }, { "epoch": 11.779310344827586, "grad_norm": 1.0581421852111816, "learning_rate": 1.4122758620689655e-05, "loss": 0.4123, "step": 3416 }, { "epoch": 11.782758620689656, "grad_norm": 0.6045722961425781, "learning_rate": 1.4126896551724137e-05, "loss": 0.3332, "step": 3417 }, { "epoch": 11.786206896551723, "grad_norm": 1.696449875831604, "learning_rate": 1.413103448275862e-05, "loss": 0.3904, "step": 3418 }, { "epoch": 11.789655172413793, "grad_norm": 1.0605107545852661, "learning_rate": 1.4135172413793103e-05, "loss": 0.348, "step": 3419 }, { "epoch": 11.793103448275861, "grad_norm": 0.7830581665039062, "learning_rate": 1.4139310344827586e-05, "loss": 0.3626, "step": 3420 }, { "epoch": 11.796551724137931, "grad_norm": 0.6754023432731628, "learning_rate": 1.4143448275862068e-05, "loss": 0.3309, "step": 3421 }, { "epoch": 11.8, "grad_norm": 0.9752089977264404, "learning_rate": 1.4147586206896552e-05, "loss": 0.3784, "step": 3422 }, { "epoch": 11.803448275862069, "grad_norm": 0.8863078951835632, "learning_rate": 1.4151724137931034e-05, "loss": 0.3361, "step": 3423 }, { "epoch": 11.806896551724138, "grad_norm": 0.8145143985748291, "learning_rate": 1.4155862068965518e-05, "loss": 0.3266, "step": 3424 }, { "epoch": 11.810344827586206, "grad_norm": 0.8000380396842957, "learning_rate": 1.416e-05, "loss": 0.3141, "step": 3425 }, { "epoch": 11.813793103448276, "grad_norm": 1.3757327795028687, "learning_rate": 1.4164137931034484e-05, "loss": 0.3219, "step": 3426 }, { "epoch": 11.817241379310344, "grad_norm": 0.9302356839179993, "learning_rate": 1.4168275862068966e-05, "loss": 0.3072, "step": 3427 }, { "epoch": 11.820689655172414, "grad_norm": 1.3165524005889893, "learning_rate": 1.417241379310345e-05, "loss": 0.3321, "step": 3428 }, { "epoch": 11.824137931034482, "grad_norm": 1.1090344190597534, "learning_rate": 1.4176551724137931e-05, "loss": 0.3214, "step": 3429 }, { "epoch": 11.827586206896552, "grad_norm": 2.821831464767456, "learning_rate": 1.4180689655172413e-05, "loss": 0.333, "step": 3430 }, { "epoch": 11.831034482758621, "grad_norm": 1.4546645879745483, "learning_rate": 1.4184827586206897e-05, "loss": 0.3683, "step": 3431 }, { "epoch": 11.83448275862069, "grad_norm": 1.3667845726013184, "learning_rate": 1.418896551724138e-05, "loss": 0.3345, "step": 3432 }, { "epoch": 11.837931034482759, "grad_norm": 1.4535788297653198, "learning_rate": 1.4193103448275863e-05, "loss": 0.3309, "step": 3433 }, { "epoch": 11.841379310344827, "grad_norm": 1.63959801197052, "learning_rate": 1.4197241379310345e-05, "loss": 0.3357, "step": 3434 }, { "epoch": 11.844827586206897, "grad_norm": 1.008693814277649, "learning_rate": 1.4201379310344829e-05, "loss": 0.3246, "step": 3435 }, { "epoch": 11.848275862068965, "grad_norm": 1.0728907585144043, "learning_rate": 1.420551724137931e-05, "loss": 0.3349, "step": 3436 }, { "epoch": 11.851724137931035, "grad_norm": 1.0511093139648438, "learning_rate": 1.4209655172413794e-05, "loss": 0.3852, "step": 3437 }, { "epoch": 11.855172413793104, "grad_norm": 1.2662994861602783, "learning_rate": 1.4213793103448276e-05, "loss": 0.3745, "step": 3438 }, { "epoch": 11.858620689655172, "grad_norm": 1.710435152053833, "learning_rate": 1.421793103448276e-05, "loss": 0.3684, "step": 3439 }, { "epoch": 11.862068965517242, "grad_norm": 1.8072307109832764, "learning_rate": 1.4222068965517242e-05, "loss": 0.5481, "step": 3440 }, { "epoch": 11.86551724137931, "grad_norm": 0.7996280789375305, "learning_rate": 1.4226206896551726e-05, "loss": 0.4294, "step": 3441 }, { "epoch": 11.86896551724138, "grad_norm": 0.7576360702514648, "learning_rate": 1.4230344827586206e-05, "loss": 0.3531, "step": 3442 }, { "epoch": 11.872413793103448, "grad_norm": 1.610506534576416, "learning_rate": 1.423448275862069e-05, "loss": 0.3982, "step": 3443 }, { "epoch": 11.875862068965517, "grad_norm": 0.8952997326850891, "learning_rate": 1.4238620689655172e-05, "loss": 0.3414, "step": 3444 }, { "epoch": 11.879310344827585, "grad_norm": 0.8412778377532959, "learning_rate": 1.4242758620689656e-05, "loss": 0.3484, "step": 3445 }, { "epoch": 11.882758620689655, "grad_norm": 0.9439750909805298, "learning_rate": 1.4246896551724138e-05, "loss": 0.3565, "step": 3446 }, { "epoch": 11.886206896551725, "grad_norm": 1.0622339248657227, "learning_rate": 1.4251034482758621e-05, "loss": 0.3554, "step": 3447 }, { "epoch": 11.889655172413793, "grad_norm": 0.8241792917251587, "learning_rate": 1.4255172413793103e-05, "loss": 0.3174, "step": 3448 }, { "epoch": 11.893103448275863, "grad_norm": 1.092547059059143, "learning_rate": 1.4259310344827587e-05, "loss": 0.3142, "step": 3449 }, { "epoch": 11.89655172413793, "grad_norm": 1.5645041465759277, "learning_rate": 1.4263448275862069e-05, "loss": 0.3278, "step": 3450 }, { "epoch": 11.9, "grad_norm": 0.8857202529907227, "learning_rate": 1.4267586206896553e-05, "loss": 0.3122, "step": 3451 }, { "epoch": 11.903448275862068, "grad_norm": 0.9157829284667969, "learning_rate": 1.4271724137931035e-05, "loss": 0.3246, "step": 3452 }, { "epoch": 11.906896551724138, "grad_norm": 0.8359509110450745, "learning_rate": 1.4275862068965518e-05, "loss": 0.3647, "step": 3453 }, { "epoch": 11.910344827586208, "grad_norm": 1.7699145078659058, "learning_rate": 1.428e-05, "loss": 0.3792, "step": 3454 }, { "epoch": 11.913793103448276, "grad_norm": 0.9951345920562744, "learning_rate": 1.4284137931034483e-05, "loss": 0.3495, "step": 3455 }, { "epoch": 11.917241379310346, "grad_norm": 0.7462577223777771, "learning_rate": 1.4288275862068965e-05, "loss": 0.336, "step": 3456 }, { "epoch": 11.920689655172414, "grad_norm": 1.3330714702606201, "learning_rate": 1.4292413793103448e-05, "loss": 0.3166, "step": 3457 }, { "epoch": 11.924137931034483, "grad_norm": 0.975191593170166, "learning_rate": 1.429655172413793e-05, "loss": 0.3298, "step": 3458 }, { "epoch": 11.927586206896551, "grad_norm": 1.5789045095443726, "learning_rate": 1.4300689655172414e-05, "loss": 0.3513, "step": 3459 }, { "epoch": 11.931034482758621, "grad_norm": 1.0663460493087769, "learning_rate": 1.4304827586206896e-05, "loss": 0.3335, "step": 3460 }, { "epoch": 11.934482758620689, "grad_norm": 2.303062677383423, "learning_rate": 1.430896551724138e-05, "loss": 0.376, "step": 3461 }, { "epoch": 11.937931034482759, "grad_norm": 1.0831841230392456, "learning_rate": 1.4313103448275863e-05, "loss": 0.3378, "step": 3462 }, { "epoch": 11.941379310344828, "grad_norm": 1.3640245199203491, "learning_rate": 1.4317241379310345e-05, "loss": 0.3636, "step": 3463 }, { "epoch": 11.944827586206896, "grad_norm": 1.0081350803375244, "learning_rate": 1.4321379310344829e-05, "loss": 0.3309, "step": 3464 }, { "epoch": 11.948275862068966, "grad_norm": 3.628736972808838, "learning_rate": 1.4325517241379311e-05, "loss": 0.5007, "step": 3465 }, { "epoch": 11.951724137931034, "grad_norm": 0.9869123101234436, "learning_rate": 1.4329655172413795e-05, "loss": 0.3559, "step": 3466 }, { "epoch": 11.955172413793104, "grad_norm": 0.8269278407096863, "learning_rate": 1.4333793103448277e-05, "loss": 0.3045, "step": 3467 }, { "epoch": 11.958620689655172, "grad_norm": 0.900250256061554, "learning_rate": 1.4337931034482759e-05, "loss": 0.3283, "step": 3468 }, { "epoch": 11.962068965517242, "grad_norm": 1.1945149898529053, "learning_rate": 1.4342068965517241e-05, "loss": 0.3175, "step": 3469 }, { "epoch": 11.96551724137931, "grad_norm": 1.7974390983581543, "learning_rate": 1.4346206896551725e-05, "loss": 0.3526, "step": 3470 }, { "epoch": 11.96896551724138, "grad_norm": 0.7665828466415405, "learning_rate": 1.4350344827586207e-05, "loss": 0.3205, "step": 3471 }, { "epoch": 11.972413793103449, "grad_norm": 0.998196542263031, "learning_rate": 1.435448275862069e-05, "loss": 0.3282, "step": 3472 }, { "epoch": 11.975862068965517, "grad_norm": 0.9325132369995117, "learning_rate": 1.4358620689655172e-05, "loss": 0.3285, "step": 3473 }, { "epoch": 11.979310344827587, "grad_norm": 1.0414823293685913, "learning_rate": 1.4362758620689656e-05, "loss": 0.3234, "step": 3474 }, { "epoch": 11.982758620689655, "grad_norm": 1.008942723274231, "learning_rate": 1.4366896551724138e-05, "loss": 0.2986, "step": 3475 }, { "epoch": 11.986206896551725, "grad_norm": 1.0891664028167725, "learning_rate": 1.4371034482758622e-05, "loss": 0.3277, "step": 3476 }, { "epoch": 11.989655172413793, "grad_norm": 1.457259178161621, "learning_rate": 1.4375172413793104e-05, "loss": 0.3537, "step": 3477 }, { "epoch": 11.993103448275862, "grad_norm": 1.299931526184082, "learning_rate": 1.4379310344827588e-05, "loss": 0.3532, "step": 3478 }, { "epoch": 11.99655172413793, "grad_norm": 1.2344098091125488, "learning_rate": 1.438344827586207e-05, "loss": 0.4021, "step": 3479 }, { "epoch": 12.0, "grad_norm": 5.989770889282227, "learning_rate": 1.4387586206896553e-05, "loss": 0.4801, "step": 3480 }, { "epoch": 12.00344827586207, "grad_norm": 1.26616370677948, "learning_rate": 1.4391724137931034e-05, "loss": 0.3967, "step": 3481 }, { "epoch": 12.006896551724138, "grad_norm": 1.0293315649032593, "learning_rate": 1.4395862068965517e-05, "loss": 0.3653, "step": 3482 }, { "epoch": 12.010344827586207, "grad_norm": 0.8453493714332581, "learning_rate": 1.44e-05, "loss": 0.354, "step": 3483 }, { "epoch": 12.013793103448275, "grad_norm": 0.8915635943412781, "learning_rate": 1.4404137931034483e-05, "loss": 0.349, "step": 3484 }, { "epoch": 12.017241379310345, "grad_norm": 2.7553725242614746, "learning_rate": 1.4408275862068965e-05, "loss": 0.3398, "step": 3485 }, { "epoch": 12.020689655172413, "grad_norm": 0.7960880398750305, "learning_rate": 1.4412413793103449e-05, "loss": 0.3198, "step": 3486 }, { "epoch": 12.024137931034483, "grad_norm": 0.9081913828849792, "learning_rate": 1.441655172413793e-05, "loss": 0.3751, "step": 3487 }, { "epoch": 12.027586206896551, "grad_norm": 0.825488805770874, "learning_rate": 1.4420689655172415e-05, "loss": 0.3506, "step": 3488 }, { "epoch": 12.03103448275862, "grad_norm": 0.9169619679450989, "learning_rate": 1.4424827586206897e-05, "loss": 0.3073, "step": 3489 }, { "epoch": 12.03448275862069, "grad_norm": 0.8366594314575195, "learning_rate": 1.442896551724138e-05, "loss": 0.3116, "step": 3490 }, { "epoch": 12.037931034482758, "grad_norm": 0.8162541389465332, "learning_rate": 1.4433103448275862e-05, "loss": 0.2971, "step": 3491 }, { "epoch": 12.041379310344828, "grad_norm": 1.1038846969604492, "learning_rate": 1.4437241379310346e-05, "loss": 0.3324, "step": 3492 }, { "epoch": 12.044827586206896, "grad_norm": 1.0499742031097412, "learning_rate": 1.4441379310344828e-05, "loss": 0.3269, "step": 3493 }, { "epoch": 12.048275862068966, "grad_norm": 1.3911041021347046, "learning_rate": 1.444551724137931e-05, "loss": 0.3229, "step": 3494 }, { "epoch": 12.051724137931034, "grad_norm": 0.975724458694458, "learning_rate": 1.4449655172413794e-05, "loss": 0.3306, "step": 3495 }, { "epoch": 12.055172413793104, "grad_norm": 0.9177997708320618, "learning_rate": 1.4453793103448276e-05, "loss": 0.3178, "step": 3496 }, { "epoch": 12.058620689655172, "grad_norm": 1.0696169137954712, "learning_rate": 1.445793103448276e-05, "loss": 0.3262, "step": 3497 }, { "epoch": 12.062068965517241, "grad_norm": 0.9868218898773193, "learning_rate": 1.4462068965517241e-05, "loss": 0.3175, "step": 3498 }, { "epoch": 12.065517241379311, "grad_norm": 0.915881335735321, "learning_rate": 1.4466206896551725e-05, "loss": 0.3276, "step": 3499 }, { "epoch": 12.068965517241379, "grad_norm": 1.3078527450561523, "learning_rate": 1.4470344827586207e-05, "loss": 0.317, "step": 3500 }, { "epoch": 12.072413793103449, "grad_norm": 1.0654598474502563, "learning_rate": 1.4474482758620691e-05, "loss": 0.3638, "step": 3501 }, { "epoch": 12.075862068965517, "grad_norm": 1.3730045557022095, "learning_rate": 1.4478620689655173e-05, "loss": 0.3372, "step": 3502 }, { "epoch": 12.079310344827586, "grad_norm": 1.1683003902435303, "learning_rate": 1.4482758620689657e-05, "loss": 0.3761, "step": 3503 }, { "epoch": 12.082758620689654, "grad_norm": 1.2808012962341309, "learning_rate": 1.4486896551724139e-05, "loss": 0.3831, "step": 3504 }, { "epoch": 12.086206896551724, "grad_norm": 4.126859664916992, "learning_rate": 1.4491034482758622e-05, "loss": 0.4631, "step": 3505 }, { "epoch": 12.089655172413794, "grad_norm": 0.6626465320587158, "learning_rate": 1.4495172413793103e-05, "loss": 0.3504, "step": 3506 }, { "epoch": 12.093103448275862, "grad_norm": 0.7144724726676941, "learning_rate": 1.4499310344827586e-05, "loss": 0.3397, "step": 3507 }, { "epoch": 12.096551724137932, "grad_norm": 1.1286081075668335, "learning_rate": 1.4503448275862068e-05, "loss": 0.3705, "step": 3508 }, { "epoch": 12.1, "grad_norm": 0.6770085096359253, "learning_rate": 1.4507586206896552e-05, "loss": 0.3247, "step": 3509 }, { "epoch": 12.10344827586207, "grad_norm": 0.7646756172180176, "learning_rate": 1.4511724137931034e-05, "loss": 0.3629, "step": 3510 }, { "epoch": 12.106896551724137, "grad_norm": 0.9217708706855774, "learning_rate": 1.4515862068965518e-05, "loss": 0.3296, "step": 3511 }, { "epoch": 12.110344827586207, "grad_norm": 0.9644472599029541, "learning_rate": 1.452e-05, "loss": 0.3683, "step": 3512 }, { "epoch": 12.113793103448275, "grad_norm": 1.1339970827102661, "learning_rate": 1.4524137931034484e-05, "loss": 0.3237, "step": 3513 }, { "epoch": 12.117241379310345, "grad_norm": 1.0616182088851929, "learning_rate": 1.4528275862068966e-05, "loss": 0.314, "step": 3514 }, { "epoch": 12.120689655172415, "grad_norm": 1.0387535095214844, "learning_rate": 1.453241379310345e-05, "loss": 0.2959, "step": 3515 }, { "epoch": 12.124137931034483, "grad_norm": 0.8084876537322998, "learning_rate": 1.4536551724137931e-05, "loss": 0.316, "step": 3516 }, { "epoch": 12.127586206896552, "grad_norm": 1.0893546342849731, "learning_rate": 1.4540689655172415e-05, "loss": 0.355, "step": 3517 }, { "epoch": 12.13103448275862, "grad_norm": 0.8252683877944946, "learning_rate": 1.4544827586206897e-05, "loss": 0.3157, "step": 3518 }, { "epoch": 12.13448275862069, "grad_norm": 0.7463306784629822, "learning_rate": 1.4548965517241379e-05, "loss": 0.3162, "step": 3519 }, { "epoch": 12.137931034482758, "grad_norm": 1.194973111152649, "learning_rate": 1.4553103448275861e-05, "loss": 0.3399, "step": 3520 }, { "epoch": 12.141379310344828, "grad_norm": 0.8645808100700378, "learning_rate": 1.4557241379310345e-05, "loss": 0.302, "step": 3521 }, { "epoch": 12.144827586206896, "grad_norm": 0.9885897040367126, "learning_rate": 1.4561379310344827e-05, "loss": 0.3039, "step": 3522 }, { "epoch": 12.148275862068965, "grad_norm": 0.9539966583251953, "learning_rate": 1.456551724137931e-05, "loss": 0.336, "step": 3523 }, { "epoch": 12.151724137931035, "grad_norm": 1.1615605354309082, "learning_rate": 1.4569655172413793e-05, "loss": 0.3175, "step": 3524 }, { "epoch": 12.155172413793103, "grad_norm": 1.4460965394973755, "learning_rate": 1.4573793103448276e-05, "loss": 0.3281, "step": 3525 }, { "epoch": 12.158620689655173, "grad_norm": 1.306809663772583, "learning_rate": 1.4577931034482758e-05, "loss": 0.3912, "step": 3526 }, { "epoch": 12.162068965517241, "grad_norm": 1.321725606918335, "learning_rate": 1.4582068965517242e-05, "loss": 0.3089, "step": 3527 }, { "epoch": 12.16551724137931, "grad_norm": 2.073042631149292, "learning_rate": 1.4586206896551724e-05, "loss": 0.4036, "step": 3528 }, { "epoch": 12.168965517241379, "grad_norm": 1.483200192451477, "learning_rate": 1.4590344827586208e-05, "loss": 0.3874, "step": 3529 }, { "epoch": 12.172413793103448, "grad_norm": 2.117772102355957, "learning_rate": 1.4594482758620691e-05, "loss": 0.5607, "step": 3530 }, { "epoch": 12.175862068965516, "grad_norm": 0.7454078793525696, "learning_rate": 1.4598620689655173e-05, "loss": 0.3595, "step": 3531 }, { "epoch": 12.179310344827586, "grad_norm": 0.7893043160438538, "learning_rate": 1.4602758620689656e-05, "loss": 0.3433, "step": 3532 }, { "epoch": 12.182758620689656, "grad_norm": 0.7041299343109131, "learning_rate": 1.4606896551724138e-05, "loss": 0.347, "step": 3533 }, { "epoch": 12.186206896551724, "grad_norm": 0.65781569480896, "learning_rate": 1.4611034482758621e-05, "loss": 0.2762, "step": 3534 }, { "epoch": 12.189655172413794, "grad_norm": 0.6765103936195374, "learning_rate": 1.4615172413793103e-05, "loss": 0.3195, "step": 3535 }, { "epoch": 12.193103448275862, "grad_norm": 1.1140351295471191, "learning_rate": 1.4619310344827587e-05, "loss": 0.314, "step": 3536 }, { "epoch": 12.196551724137931, "grad_norm": 0.8361272215843201, "learning_rate": 1.4623448275862069e-05, "loss": 0.3131, "step": 3537 }, { "epoch": 12.2, "grad_norm": 0.7788493037223816, "learning_rate": 1.4627586206896553e-05, "loss": 0.3075, "step": 3538 }, { "epoch": 12.203448275862069, "grad_norm": 0.7160586714744568, "learning_rate": 1.4631724137931035e-05, "loss": 0.2871, "step": 3539 }, { "epoch": 12.206896551724139, "grad_norm": 0.8425509333610535, "learning_rate": 1.4635862068965518e-05, "loss": 0.3843, "step": 3540 }, { "epoch": 12.210344827586207, "grad_norm": 1.1866722106933594, "learning_rate": 1.464e-05, "loss": 0.3185, "step": 3541 }, { "epoch": 12.213793103448277, "grad_norm": 1.0145328044891357, "learning_rate": 1.4644137931034484e-05, "loss": 0.3142, "step": 3542 }, { "epoch": 12.217241379310344, "grad_norm": 0.951895534992218, "learning_rate": 1.4648275862068966e-05, "loss": 0.3383, "step": 3543 }, { "epoch": 12.220689655172414, "grad_norm": 1.233513355255127, "learning_rate": 1.465241379310345e-05, "loss": 0.3142, "step": 3544 }, { "epoch": 12.224137931034482, "grad_norm": 0.7840583920478821, "learning_rate": 1.465655172413793e-05, "loss": 0.3306, "step": 3545 }, { "epoch": 12.227586206896552, "grad_norm": 0.7403082251548767, "learning_rate": 1.4660689655172414e-05, "loss": 0.2879, "step": 3546 }, { "epoch": 12.23103448275862, "grad_norm": 0.931374728679657, "learning_rate": 1.4664827586206896e-05, "loss": 0.2841, "step": 3547 }, { "epoch": 12.23448275862069, "grad_norm": 0.9611603617668152, "learning_rate": 1.466896551724138e-05, "loss": 0.31, "step": 3548 }, { "epoch": 12.23793103448276, "grad_norm": 0.7498269081115723, "learning_rate": 1.4673103448275862e-05, "loss": 0.3291, "step": 3549 }, { "epoch": 12.241379310344827, "grad_norm": 0.9600017070770264, "learning_rate": 1.4677241379310345e-05, "loss": 0.3287, "step": 3550 }, { "epoch": 12.244827586206897, "grad_norm": 1.1481947898864746, "learning_rate": 1.4681379310344827e-05, "loss": 0.3512, "step": 3551 }, { "epoch": 12.248275862068965, "grad_norm": 2.0817346572875977, "learning_rate": 1.4685517241379311e-05, "loss": 0.3174, "step": 3552 }, { "epoch": 12.251724137931035, "grad_norm": 1.4262518882751465, "learning_rate": 1.4689655172413793e-05, "loss": 0.3235, "step": 3553 }, { "epoch": 12.255172413793103, "grad_norm": 1.425869107246399, "learning_rate": 1.4693793103448277e-05, "loss": 0.3948, "step": 3554 }, { "epoch": 12.258620689655173, "grad_norm": 1.7058990001678467, "learning_rate": 1.4697931034482759e-05, "loss": 0.4609, "step": 3555 }, { "epoch": 12.26206896551724, "grad_norm": 0.7046235203742981, "learning_rate": 1.4702068965517243e-05, "loss": 0.3665, "step": 3556 }, { "epoch": 12.26551724137931, "grad_norm": 0.7470096945762634, "learning_rate": 1.4706206896551725e-05, "loss": 0.3308, "step": 3557 }, { "epoch": 12.26896551724138, "grad_norm": 0.7032349705696106, "learning_rate": 1.4710344827586207e-05, "loss": 0.3423, "step": 3558 }, { "epoch": 12.272413793103448, "grad_norm": 0.9200426340103149, "learning_rate": 1.4714482758620689e-05, "loss": 0.3527, "step": 3559 }, { "epoch": 12.275862068965518, "grad_norm": 0.8277620077133179, "learning_rate": 1.4718620689655172e-05, "loss": 0.346, "step": 3560 }, { "epoch": 12.279310344827586, "grad_norm": 0.9896325469017029, "learning_rate": 1.4722758620689654e-05, "loss": 0.3089, "step": 3561 }, { "epoch": 12.282758620689656, "grad_norm": 0.7832790613174438, "learning_rate": 1.4726896551724138e-05, "loss": 0.3261, "step": 3562 }, { "epoch": 12.286206896551723, "grad_norm": 0.81923907995224, "learning_rate": 1.4731034482758622e-05, "loss": 0.3402, "step": 3563 }, { "epoch": 12.289655172413793, "grad_norm": 0.7713592648506165, "learning_rate": 1.4735172413793104e-05, "loss": 0.2825, "step": 3564 }, { "epoch": 12.293103448275861, "grad_norm": 0.8611937165260315, "learning_rate": 1.4739310344827588e-05, "loss": 0.3115, "step": 3565 }, { "epoch": 12.296551724137931, "grad_norm": 1.0489156246185303, "learning_rate": 1.474344827586207e-05, "loss": 0.3357, "step": 3566 }, { "epoch": 12.3, "grad_norm": 1.1052449941635132, "learning_rate": 1.4747586206896553e-05, "loss": 0.3092, "step": 3567 }, { "epoch": 12.303448275862069, "grad_norm": 0.8269464373588562, "learning_rate": 1.4751724137931035e-05, "loss": 0.3073, "step": 3568 }, { "epoch": 12.306896551724138, "grad_norm": 0.8508937954902649, "learning_rate": 1.4755862068965519e-05, "loss": 0.3003, "step": 3569 }, { "epoch": 12.310344827586206, "grad_norm": 1.2866920232772827, "learning_rate": 1.4760000000000001e-05, "loss": 0.2872, "step": 3570 }, { "epoch": 12.313793103448276, "grad_norm": 1.0554522275924683, "learning_rate": 1.4764137931034483e-05, "loss": 0.3195, "step": 3571 }, { "epoch": 12.317241379310344, "grad_norm": 1.5492546558380127, "learning_rate": 1.4768275862068965e-05, "loss": 0.3124, "step": 3572 }, { "epoch": 12.320689655172414, "grad_norm": 1.1216623783111572, "learning_rate": 1.4772413793103449e-05, "loss": 0.3317, "step": 3573 }, { "epoch": 12.324137931034482, "grad_norm": 0.9868923425674438, "learning_rate": 1.477655172413793e-05, "loss": 0.327, "step": 3574 }, { "epoch": 12.327586206896552, "grad_norm": 1.3217599391937256, "learning_rate": 1.4780689655172414e-05, "loss": 0.3472, "step": 3575 }, { "epoch": 12.331034482758621, "grad_norm": 1.2261743545532227, "learning_rate": 1.4784827586206896e-05, "loss": 0.357, "step": 3576 }, { "epoch": 12.33448275862069, "grad_norm": 0.9810709357261658, "learning_rate": 1.478896551724138e-05, "loss": 0.3319, "step": 3577 }, { "epoch": 12.337931034482759, "grad_norm": 1.5271424055099487, "learning_rate": 1.4793103448275862e-05, "loss": 0.3681, "step": 3578 }, { "epoch": 12.341379310344827, "grad_norm": 1.4011958837509155, "learning_rate": 1.4797241379310346e-05, "loss": 0.3616, "step": 3579 }, { "epoch": 12.344827586206897, "grad_norm": 1.5752546787261963, "learning_rate": 1.4801379310344828e-05, "loss": 0.4633, "step": 3580 }, { "epoch": 12.348275862068965, "grad_norm": 0.7169761061668396, "learning_rate": 1.4805517241379312e-05, "loss": 0.3713, "step": 3581 }, { "epoch": 12.351724137931035, "grad_norm": 0.6456942558288574, "learning_rate": 1.4809655172413794e-05, "loss": 0.329, "step": 3582 }, { "epoch": 12.355172413793104, "grad_norm": 0.8422977328300476, "learning_rate": 1.4813793103448276e-05, "loss": 0.3012, "step": 3583 }, { "epoch": 12.358620689655172, "grad_norm": 0.9861236214637756, "learning_rate": 1.4817931034482758e-05, "loss": 0.3196, "step": 3584 }, { "epoch": 12.362068965517242, "grad_norm": 0.7838706374168396, "learning_rate": 1.4822068965517241e-05, "loss": 0.3225, "step": 3585 }, { "epoch": 12.36551724137931, "grad_norm": 1.349824070930481, "learning_rate": 1.4826206896551723e-05, "loss": 0.3278, "step": 3586 }, { "epoch": 12.36896551724138, "grad_norm": 0.8191677927970886, "learning_rate": 1.4830344827586207e-05, "loss": 0.3155, "step": 3587 }, { "epoch": 12.372413793103448, "grad_norm": 0.7169542908668518, "learning_rate": 1.483448275862069e-05, "loss": 0.3641, "step": 3588 }, { "epoch": 12.375862068965517, "grad_norm": 0.8974733352661133, "learning_rate": 1.4838620689655173e-05, "loss": 0.3, "step": 3589 }, { "epoch": 12.379310344827585, "grad_norm": 1.1749502420425415, "learning_rate": 1.4842758620689655e-05, "loss": 0.3194, "step": 3590 }, { "epoch": 12.382758620689655, "grad_norm": 0.9658800363540649, "learning_rate": 1.4846896551724139e-05, "loss": 0.3282, "step": 3591 }, { "epoch": 12.386206896551725, "grad_norm": 0.9552044868469238, "learning_rate": 1.485103448275862e-05, "loss": 0.3083, "step": 3592 }, { "epoch": 12.389655172413793, "grad_norm": 1.1060616970062256, "learning_rate": 1.4855172413793104e-05, "loss": 0.332, "step": 3593 }, { "epoch": 12.393103448275863, "grad_norm": 1.0273667573928833, "learning_rate": 1.4859310344827586e-05, "loss": 0.3327, "step": 3594 }, { "epoch": 12.39655172413793, "grad_norm": 1.4240728616714478, "learning_rate": 1.486344827586207e-05, "loss": 0.3644, "step": 3595 }, { "epoch": 12.4, "grad_norm": 1.0024340152740479, "learning_rate": 1.4867586206896552e-05, "loss": 0.3444, "step": 3596 }, { "epoch": 12.403448275862068, "grad_norm": 1.212823748588562, "learning_rate": 1.4871724137931034e-05, "loss": 0.3006, "step": 3597 }, { "epoch": 12.406896551724138, "grad_norm": 1.1098933219909668, "learning_rate": 1.4875862068965518e-05, "loss": 0.3208, "step": 3598 }, { "epoch": 12.410344827586206, "grad_norm": 0.9068141579627991, "learning_rate": 1.488e-05, "loss": 0.3135, "step": 3599 }, { "epoch": 12.413793103448276, "grad_norm": 0.9250069856643677, "learning_rate": 1.4884137931034484e-05, "loss": 0.3232, "step": 3600 }, { "epoch": 12.417241379310346, "grad_norm": 0.9820652604103088, "learning_rate": 1.4888275862068966e-05, "loss": 0.3118, "step": 3601 }, { "epoch": 12.420689655172414, "grad_norm": 1.8442033529281616, "learning_rate": 1.489241379310345e-05, "loss": 0.3122, "step": 3602 }, { "epoch": 12.424137931034483, "grad_norm": 1.6721259355545044, "learning_rate": 1.4896551724137931e-05, "loss": 0.3499, "step": 3603 }, { "epoch": 12.427586206896551, "grad_norm": 1.3997021913528442, "learning_rate": 1.4900689655172415e-05, "loss": 0.3666, "step": 3604 }, { "epoch": 12.431034482758621, "grad_norm": 6.918236255645752, "learning_rate": 1.4904827586206897e-05, "loss": 0.5085, "step": 3605 }, { "epoch": 12.434482758620689, "grad_norm": 0.9461071491241455, "learning_rate": 1.490896551724138e-05, "loss": 0.4297, "step": 3606 }, { "epoch": 12.437931034482759, "grad_norm": 1.148206353187561, "learning_rate": 1.4913103448275863e-05, "loss": 0.3473, "step": 3607 }, { "epoch": 12.441379310344828, "grad_norm": 0.6565497517585754, "learning_rate": 1.4917241379310346e-05, "loss": 0.3441, "step": 3608 }, { "epoch": 12.444827586206896, "grad_norm": 0.7366973757743835, "learning_rate": 1.4921379310344827e-05, "loss": 0.3258, "step": 3609 }, { "epoch": 12.448275862068966, "grad_norm": 0.6692348718643188, "learning_rate": 1.492551724137931e-05, "loss": 0.3249, "step": 3610 }, { "epoch": 12.451724137931034, "grad_norm": 0.6688734889030457, "learning_rate": 1.4929655172413793e-05, "loss": 0.2841, "step": 3611 }, { "epoch": 12.455172413793104, "grad_norm": 0.858232319355011, "learning_rate": 1.4933793103448276e-05, "loss": 0.3095, "step": 3612 }, { "epoch": 12.458620689655172, "grad_norm": 0.7311927676200867, "learning_rate": 1.4937931034482758e-05, "loss": 0.3318, "step": 3613 }, { "epoch": 12.462068965517242, "grad_norm": 1.1725636720657349, "learning_rate": 1.4942068965517242e-05, "loss": 0.3239, "step": 3614 }, { "epoch": 12.46551724137931, "grad_norm": 0.9013190269470215, "learning_rate": 1.4946206896551724e-05, "loss": 0.3285, "step": 3615 }, { "epoch": 12.46896551724138, "grad_norm": 0.891779363155365, "learning_rate": 1.4950344827586208e-05, "loss": 0.3329, "step": 3616 }, { "epoch": 12.472413793103449, "grad_norm": 0.7326419949531555, "learning_rate": 1.495448275862069e-05, "loss": 0.33, "step": 3617 }, { "epoch": 12.475862068965517, "grad_norm": 1.2384555339813232, "learning_rate": 1.4958620689655173e-05, "loss": 0.3138, "step": 3618 }, { "epoch": 12.479310344827587, "grad_norm": 1.063190221786499, "learning_rate": 1.4962758620689655e-05, "loss": 0.3348, "step": 3619 }, { "epoch": 12.482758620689655, "grad_norm": 1.0720176696777344, "learning_rate": 1.496689655172414e-05, "loss": 0.3271, "step": 3620 }, { "epoch": 12.486206896551725, "grad_norm": 0.8125686645507812, "learning_rate": 1.4971034482758621e-05, "loss": 0.3469, "step": 3621 }, { "epoch": 12.489655172413793, "grad_norm": 0.815011203289032, "learning_rate": 1.4975172413793103e-05, "loss": 0.3213, "step": 3622 }, { "epoch": 12.493103448275862, "grad_norm": 1.233644962310791, "learning_rate": 1.4979310344827585e-05, "loss": 0.288, "step": 3623 }, { "epoch": 12.49655172413793, "grad_norm": 1.1503658294677734, "learning_rate": 1.4983448275862069e-05, "loss": 0.3379, "step": 3624 }, { "epoch": 12.5, "grad_norm": 1.5490617752075195, "learning_rate": 1.4987586206896551e-05, "loss": 0.3253, "step": 3625 }, { "epoch": 12.50344827586207, "grad_norm": 5.614068984985352, "learning_rate": 1.4991724137931035e-05, "loss": 0.3241, "step": 3626 }, { "epoch": 12.506896551724138, "grad_norm": 1.4943115711212158, "learning_rate": 1.4995862068965517e-05, "loss": 0.3491, "step": 3627 }, { "epoch": 12.510344827586207, "grad_norm": 1.2406545877456665, "learning_rate": 1.5e-05, "loss": 0.3128, "step": 3628 }, { "epoch": 12.513793103448275, "grad_norm": 1.446350336074829, "learning_rate": 1.5004137931034484e-05, "loss": 0.3449, "step": 3629 }, { "epoch": 12.517241379310345, "grad_norm": 2.5794098377227783, "learning_rate": 1.5008275862068968e-05, "loss": 0.4727, "step": 3630 }, { "epoch": 12.520689655172413, "grad_norm": 1.3914400339126587, "learning_rate": 1.5012413793103448e-05, "loss": 0.3367, "step": 3631 }, { "epoch": 12.524137931034483, "grad_norm": 0.7147465944290161, "learning_rate": 1.5016551724137932e-05, "loss": 0.3297, "step": 3632 }, { "epoch": 12.527586206896551, "grad_norm": 1.0186423063278198, "learning_rate": 1.5020689655172416e-05, "loss": 0.311, "step": 3633 }, { "epoch": 12.53103448275862, "grad_norm": 0.8225456476211548, "learning_rate": 1.50248275862069e-05, "loss": 0.3243, "step": 3634 }, { "epoch": 12.53448275862069, "grad_norm": 0.7307153344154358, "learning_rate": 1.5028965517241378e-05, "loss": 0.3361, "step": 3635 }, { "epoch": 12.537931034482758, "grad_norm": 1.303120732307434, "learning_rate": 1.5033103448275862e-05, "loss": 0.3164, "step": 3636 }, { "epoch": 12.541379310344828, "grad_norm": 1.0102038383483887, "learning_rate": 1.5037241379310345e-05, "loss": 0.3343, "step": 3637 }, { "epoch": 12.544827586206896, "grad_norm": 1.0144498348236084, "learning_rate": 1.5041379310344829e-05, "loss": 0.3171, "step": 3638 }, { "epoch": 12.548275862068966, "grad_norm": 1.2188529968261719, "learning_rate": 1.504551724137931e-05, "loss": 0.3306, "step": 3639 }, { "epoch": 12.551724137931034, "grad_norm": 1.1812376976013184, "learning_rate": 1.5049655172413793e-05, "loss": 0.3027, "step": 3640 }, { "epoch": 12.555172413793104, "grad_norm": 1.5132505893707275, "learning_rate": 1.5053793103448277e-05, "loss": 0.3296, "step": 3641 }, { "epoch": 12.558620689655172, "grad_norm": 0.8100770711898804, "learning_rate": 1.505793103448276e-05, "loss": 0.3048, "step": 3642 }, { "epoch": 12.562068965517241, "grad_norm": 1.0304349660873413, "learning_rate": 1.506206896551724e-05, "loss": 0.3156, "step": 3643 }, { "epoch": 12.565517241379311, "grad_norm": 1.9391705989837646, "learning_rate": 1.5066206896551725e-05, "loss": 0.3448, "step": 3644 }, { "epoch": 12.568965517241379, "grad_norm": 1.2473970651626587, "learning_rate": 1.5070344827586208e-05, "loss": 0.3654, "step": 3645 }, { "epoch": 12.572413793103449, "grad_norm": 1.127953290939331, "learning_rate": 1.5074482758620692e-05, "loss": 0.365, "step": 3646 }, { "epoch": 12.575862068965517, "grad_norm": 1.3756201267242432, "learning_rate": 1.5078620689655172e-05, "loss": 0.3271, "step": 3647 }, { "epoch": 12.579310344827586, "grad_norm": 1.4125492572784424, "learning_rate": 1.5082758620689654e-05, "loss": 0.331, "step": 3648 }, { "epoch": 12.582758620689654, "grad_norm": 0.9906709790229797, "learning_rate": 1.5086896551724138e-05, "loss": 0.324, "step": 3649 }, { "epoch": 12.586206896551724, "grad_norm": 1.1331419944763184, "learning_rate": 1.5091034482758622e-05, "loss": 0.3231, "step": 3650 }, { "epoch": 12.589655172413792, "grad_norm": 0.8947917222976685, "learning_rate": 1.5095172413793102e-05, "loss": 0.373, "step": 3651 }, { "epoch": 12.593103448275862, "grad_norm": 1.3394232988357544, "learning_rate": 1.5099310344827586e-05, "loss": 0.3298, "step": 3652 }, { "epoch": 12.596551724137932, "grad_norm": 1.1162525415420532, "learning_rate": 1.510344827586207e-05, "loss": 0.3533, "step": 3653 }, { "epoch": 12.6, "grad_norm": 1.4165152311325073, "learning_rate": 1.5107586206896553e-05, "loss": 0.3572, "step": 3654 }, { "epoch": 12.60344827586207, "grad_norm": 1.680238127708435, "learning_rate": 1.5111724137931034e-05, "loss": 0.4818, "step": 3655 }, { "epoch": 12.606896551724137, "grad_norm": 1.0560272932052612, "learning_rate": 1.5115862068965517e-05, "loss": 0.3825, "step": 3656 }, { "epoch": 12.610344827586207, "grad_norm": 0.8286451697349548, "learning_rate": 1.5120000000000001e-05, "loss": 0.3097, "step": 3657 }, { "epoch": 12.613793103448275, "grad_norm": 0.6894571185112, "learning_rate": 1.5124137931034485e-05, "loss": 0.3341, "step": 3658 }, { "epoch": 12.617241379310345, "grad_norm": 0.8052505254745483, "learning_rate": 1.5128275862068965e-05, "loss": 0.3152, "step": 3659 }, { "epoch": 12.620689655172415, "grad_norm": 0.6944785118103027, "learning_rate": 1.5132413793103449e-05, "loss": 0.3125, "step": 3660 }, { "epoch": 12.624137931034483, "grad_norm": 1.0126858949661255, "learning_rate": 1.513655172413793e-05, "loss": 0.3066, "step": 3661 }, { "epoch": 12.627586206896552, "grad_norm": 0.8814734220504761, "learning_rate": 1.5140689655172414e-05, "loss": 0.3398, "step": 3662 }, { "epoch": 12.63103448275862, "grad_norm": 1.3391188383102417, "learning_rate": 1.5144827586206898e-05, "loss": 0.3273, "step": 3663 }, { "epoch": 12.63448275862069, "grad_norm": 0.8359869718551636, "learning_rate": 1.5148965517241378e-05, "loss": 0.344, "step": 3664 }, { "epoch": 12.637931034482758, "grad_norm": 0.8233354687690735, "learning_rate": 1.5153103448275862e-05, "loss": 0.3066, "step": 3665 }, { "epoch": 12.641379310344828, "grad_norm": 1.0617337226867676, "learning_rate": 1.5157241379310346e-05, "loss": 0.3551, "step": 3666 }, { "epoch": 12.644827586206896, "grad_norm": 0.7391114830970764, "learning_rate": 1.516137931034483e-05, "loss": 0.3242, "step": 3667 }, { "epoch": 12.648275862068965, "grad_norm": 1.0387837886810303, "learning_rate": 1.516551724137931e-05, "loss": 0.3214, "step": 3668 }, { "epoch": 12.651724137931035, "grad_norm": 0.8466061949729919, "learning_rate": 1.5169655172413794e-05, "loss": 0.2945, "step": 3669 }, { "epoch": 12.655172413793103, "grad_norm": 0.7936972379684448, "learning_rate": 1.5173793103448277e-05, "loss": 0.302, "step": 3670 }, { "epoch": 12.658620689655173, "grad_norm": 1.2174344062805176, "learning_rate": 1.5177931034482761e-05, "loss": 0.342, "step": 3671 }, { "epoch": 12.662068965517241, "grad_norm": 1.54228675365448, "learning_rate": 1.5182068965517241e-05, "loss": 0.3418, "step": 3672 }, { "epoch": 12.66551724137931, "grad_norm": 1.1938979625701904, "learning_rate": 1.5186206896551725e-05, "loss": 0.3232, "step": 3673 }, { "epoch": 12.668965517241379, "grad_norm": 1.1790108680725098, "learning_rate": 1.5190344827586207e-05, "loss": 0.3291, "step": 3674 }, { "epoch": 12.672413793103448, "grad_norm": 2.0230326652526855, "learning_rate": 1.519448275862069e-05, "loss": 0.335, "step": 3675 }, { "epoch": 12.675862068965518, "grad_norm": 2.023991823196411, "learning_rate": 1.5198620689655171e-05, "loss": 0.3285, "step": 3676 }, { "epoch": 12.679310344827586, "grad_norm": 1.0920225381851196, "learning_rate": 1.5202758620689655e-05, "loss": 0.295, "step": 3677 }, { "epoch": 12.682758620689656, "grad_norm": 1.08978271484375, "learning_rate": 1.5206896551724139e-05, "loss": 0.3305, "step": 3678 }, { "epoch": 12.686206896551724, "grad_norm": 1.7910391092300415, "learning_rate": 1.5211034482758622e-05, "loss": 0.4087, "step": 3679 }, { "epoch": 12.689655172413794, "grad_norm": 3.7692337036132812, "learning_rate": 1.5215172413793103e-05, "loss": 0.4581, "step": 3680 }, { "epoch": 12.693103448275862, "grad_norm": 0.8885387778282166, "learning_rate": 1.5219310344827586e-05, "loss": 0.3822, "step": 3681 }, { "epoch": 12.696551724137931, "grad_norm": 0.939327597618103, "learning_rate": 1.522344827586207e-05, "loss": 0.3383, "step": 3682 }, { "epoch": 12.7, "grad_norm": 0.6421239376068115, "learning_rate": 1.5227586206896554e-05, "loss": 0.3586, "step": 3683 }, { "epoch": 12.703448275862069, "grad_norm": 0.9056881666183472, "learning_rate": 1.5231724137931034e-05, "loss": 0.3344, "step": 3684 }, { "epoch": 12.706896551724139, "grad_norm": 0.7552617192268372, "learning_rate": 1.5235862068965518e-05, "loss": 0.3497, "step": 3685 }, { "epoch": 12.710344827586207, "grad_norm": 0.7182134389877319, "learning_rate": 1.524e-05, "loss": 0.3009, "step": 3686 }, { "epoch": 12.713793103448277, "grad_norm": 0.836024820804596, "learning_rate": 1.5244137931034483e-05, "loss": 0.3423, "step": 3687 }, { "epoch": 12.717241379310344, "grad_norm": 1.2786785364151, "learning_rate": 1.5248275862068964e-05, "loss": 0.3229, "step": 3688 }, { "epoch": 12.720689655172414, "grad_norm": 0.6914835572242737, "learning_rate": 1.5252413793103448e-05, "loss": 0.294, "step": 3689 }, { "epoch": 12.724137931034482, "grad_norm": 0.8961486220359802, "learning_rate": 1.5256551724137931e-05, "loss": 0.3208, "step": 3690 }, { "epoch": 12.727586206896552, "grad_norm": 1.0678987503051758, "learning_rate": 1.5260689655172417e-05, "loss": 0.3178, "step": 3691 }, { "epoch": 12.73103448275862, "grad_norm": 0.8217285871505737, "learning_rate": 1.5264827586206895e-05, "loss": 0.3521, "step": 3692 }, { "epoch": 12.73448275862069, "grad_norm": 0.8688945770263672, "learning_rate": 1.5268965517241377e-05, "loss": 0.2971, "step": 3693 }, { "epoch": 12.73793103448276, "grad_norm": 1.4245789051055908, "learning_rate": 1.5273103448275863e-05, "loss": 0.3275, "step": 3694 }, { "epoch": 12.741379310344827, "grad_norm": 1.1915968656539917, "learning_rate": 1.5277241379310345e-05, "loss": 0.3838, "step": 3695 }, { "epoch": 12.744827586206897, "grad_norm": 1.2404510974884033, "learning_rate": 1.528137931034483e-05, "loss": 0.3202, "step": 3696 }, { "epoch": 12.748275862068965, "grad_norm": 1.1115152835845947, "learning_rate": 1.528551724137931e-05, "loss": 0.3177, "step": 3697 }, { "epoch": 12.751724137931035, "grad_norm": 0.8763297200202942, "learning_rate": 1.5289655172413794e-05, "loss": 0.3665, "step": 3698 }, { "epoch": 12.755172413793103, "grad_norm": 0.9152575731277466, "learning_rate": 1.5293793103448276e-05, "loss": 0.3561, "step": 3699 }, { "epoch": 12.758620689655173, "grad_norm": 0.9045568108558655, "learning_rate": 1.529793103448276e-05, "loss": 0.3052, "step": 3700 }, { "epoch": 12.76206896551724, "grad_norm": 1.2721054553985596, "learning_rate": 1.530206896551724e-05, "loss": 0.3707, "step": 3701 }, { "epoch": 12.76551724137931, "grad_norm": 2.2137105464935303, "learning_rate": 1.5306206896551726e-05, "loss": 0.3566, "step": 3702 }, { "epoch": 12.76896551724138, "grad_norm": 1.6120741367340088, "learning_rate": 1.5310344827586208e-05, "loss": 0.4115, "step": 3703 }, { "epoch": 12.772413793103448, "grad_norm": 1.4651191234588623, "learning_rate": 1.5314482758620693e-05, "loss": 0.3281, "step": 3704 }, { "epoch": 12.775862068965518, "grad_norm": 1.6597087383270264, "learning_rate": 1.531862068965517e-05, "loss": 0.5153, "step": 3705 }, { "epoch": 12.779310344827586, "grad_norm": 0.916081964969635, "learning_rate": 1.5322758620689654e-05, "loss": 0.3921, "step": 3706 }, { "epoch": 12.782758620689656, "grad_norm": 0.6546891331672668, "learning_rate": 1.532689655172414e-05, "loss": 0.3363, "step": 3707 }, { "epoch": 12.786206896551723, "grad_norm": 0.7866283059120178, "learning_rate": 1.533103448275862e-05, "loss": 0.3381, "step": 3708 }, { "epoch": 12.789655172413793, "grad_norm": 0.7450565695762634, "learning_rate": 1.5335172413793103e-05, "loss": 0.3491, "step": 3709 }, { "epoch": 12.793103448275861, "grad_norm": 0.8908780813217163, "learning_rate": 1.5339310344827585e-05, "loss": 0.2991, "step": 3710 }, { "epoch": 12.796551724137931, "grad_norm": 0.6765058040618896, "learning_rate": 1.534344827586207e-05, "loss": 0.3026, "step": 3711 }, { "epoch": 12.8, "grad_norm": 1.0291775465011597, "learning_rate": 1.5347586206896553e-05, "loss": 0.301, "step": 3712 }, { "epoch": 12.803448275862069, "grad_norm": 0.7201748490333557, "learning_rate": 1.5351724137931035e-05, "loss": 0.3275, "step": 3713 }, { "epoch": 12.806896551724138, "grad_norm": 0.8661954998970032, "learning_rate": 1.5355862068965517e-05, "loss": 0.329, "step": 3714 }, { "epoch": 12.810344827586206, "grad_norm": 0.6648703217506409, "learning_rate": 1.5360000000000002e-05, "loss": 0.3139, "step": 3715 }, { "epoch": 12.813793103448276, "grad_norm": 0.77756267786026, "learning_rate": 1.5364137931034484e-05, "loss": 0.3118, "step": 3716 }, { "epoch": 12.817241379310344, "grad_norm": 0.6564409136772156, "learning_rate": 1.5368275862068966e-05, "loss": 0.3109, "step": 3717 }, { "epoch": 12.820689655172414, "grad_norm": 1.0817925930023193, "learning_rate": 1.5372413793103448e-05, "loss": 0.3455, "step": 3718 }, { "epoch": 12.824137931034482, "grad_norm": 0.7865199446678162, "learning_rate": 1.537655172413793e-05, "loss": 0.3178, "step": 3719 }, { "epoch": 12.827586206896552, "grad_norm": 0.8471601009368896, "learning_rate": 1.5380689655172415e-05, "loss": 0.3327, "step": 3720 }, { "epoch": 12.831034482758621, "grad_norm": 0.9662609696388245, "learning_rate": 1.5384827586206894e-05, "loss": 0.357, "step": 3721 }, { "epoch": 12.83448275862069, "grad_norm": 0.8669106960296631, "learning_rate": 1.538896551724138e-05, "loss": 0.3056, "step": 3722 }, { "epoch": 12.837931034482759, "grad_norm": 0.7896295785903931, "learning_rate": 1.539310344827586e-05, "loss": 0.335, "step": 3723 }, { "epoch": 12.841379310344827, "grad_norm": 1.093196153640747, "learning_rate": 1.5397241379310347e-05, "loss": 0.3, "step": 3724 }, { "epoch": 12.844827586206897, "grad_norm": 1.039528489112854, "learning_rate": 1.5401379310344826e-05, "loss": 0.3331, "step": 3725 }, { "epoch": 12.848275862068965, "grad_norm": 1.1100356578826904, "learning_rate": 1.540551724137931e-05, "loss": 0.3194, "step": 3726 }, { "epoch": 12.851724137931035, "grad_norm": 0.9298973679542542, "learning_rate": 1.5409655172413793e-05, "loss": 0.3294, "step": 3727 }, { "epoch": 12.855172413793104, "grad_norm": 1.579939365386963, "learning_rate": 1.541379310344828e-05, "loss": 0.3173, "step": 3728 }, { "epoch": 12.858620689655172, "grad_norm": 1.4766618013381958, "learning_rate": 1.541793103448276e-05, "loss": 0.3803, "step": 3729 }, { "epoch": 12.862068965517242, "grad_norm": 3.5092458724975586, "learning_rate": 1.5422068965517242e-05, "loss": 0.4767, "step": 3730 }, { "epoch": 12.86551724137931, "grad_norm": 1.629119873046875, "learning_rate": 1.5426206896551724e-05, "loss": 0.3803, "step": 3731 }, { "epoch": 12.86896551724138, "grad_norm": 0.7670642137527466, "learning_rate": 1.5430344827586206e-05, "loss": 0.3669, "step": 3732 }, { "epoch": 12.872413793103448, "grad_norm": 0.7892146706581116, "learning_rate": 1.5434482758620692e-05, "loss": 0.3128, "step": 3733 }, { "epoch": 12.875862068965517, "grad_norm": 0.9369604587554932, "learning_rate": 1.543862068965517e-05, "loss": 0.3856, "step": 3734 }, { "epoch": 12.879310344827585, "grad_norm": 0.9546313881874084, "learning_rate": 1.5442758620689656e-05, "loss": 0.345, "step": 3735 }, { "epoch": 12.882758620689655, "grad_norm": 0.7184842228889465, "learning_rate": 1.5446896551724138e-05, "loss": 0.348, "step": 3736 }, { "epoch": 12.886206896551725, "grad_norm": 0.884932279586792, "learning_rate": 1.5451034482758623e-05, "loss": 0.3093, "step": 3737 }, { "epoch": 12.889655172413793, "grad_norm": 0.8054507970809937, "learning_rate": 1.5455172413793102e-05, "loss": 0.3237, "step": 3738 }, { "epoch": 12.893103448275863, "grad_norm": 0.8238664865493774, "learning_rate": 1.5459310344827587e-05, "loss": 0.3191, "step": 3739 }, { "epoch": 12.89655172413793, "grad_norm": 1.1684670448303223, "learning_rate": 1.546344827586207e-05, "loss": 0.2956, "step": 3740 }, { "epoch": 12.9, "grad_norm": 0.8581128120422363, "learning_rate": 1.5467586206896555e-05, "loss": 0.2983, "step": 3741 }, { "epoch": 12.903448275862068, "grad_norm": 0.9454929828643799, "learning_rate": 1.5471724137931033e-05, "loss": 0.2951, "step": 3742 }, { "epoch": 12.906896551724138, "grad_norm": 0.7948169708251953, "learning_rate": 1.547586206896552e-05, "loss": 0.2978, "step": 3743 }, { "epoch": 12.910344827586208, "grad_norm": 1.0704249143600464, "learning_rate": 1.548e-05, "loss": 0.2952, "step": 3744 }, { "epoch": 12.913793103448276, "grad_norm": 1.3219411373138428, "learning_rate": 1.5484137931034483e-05, "loss": 0.303, "step": 3745 }, { "epoch": 12.917241379310346, "grad_norm": 0.9928703308105469, "learning_rate": 1.5488275862068965e-05, "loss": 0.3017, "step": 3746 }, { "epoch": 12.920689655172414, "grad_norm": 0.9065254926681519, "learning_rate": 1.5492413793103447e-05, "loss": 0.3275, "step": 3747 }, { "epoch": 12.924137931034483, "grad_norm": 0.9592154622077942, "learning_rate": 1.5496551724137932e-05, "loss": 0.3299, "step": 3748 }, { "epoch": 12.927586206896551, "grad_norm": 0.8697330951690674, "learning_rate": 1.5500689655172414e-05, "loss": 0.3278, "step": 3749 }, { "epoch": 12.931034482758621, "grad_norm": 0.9528943300247192, "learning_rate": 1.5504827586206896e-05, "loss": 0.3313, "step": 3750 }, { "epoch": 12.934482758620689, "grad_norm": 0.9696260094642639, "learning_rate": 1.550896551724138e-05, "loss": 0.3413, "step": 3751 }, { "epoch": 12.937931034482759, "grad_norm": 0.9237114191055298, "learning_rate": 1.5513103448275864e-05, "loss": 0.3394, "step": 3752 }, { "epoch": 12.941379310344828, "grad_norm": 2.269866704940796, "learning_rate": 1.5517241379310346e-05, "loss": 0.3165, "step": 3753 }, { "epoch": 12.944827586206896, "grad_norm": 1.1805471181869507, "learning_rate": 1.5521379310344828e-05, "loss": 0.414, "step": 3754 }, { "epoch": 12.948275862068966, "grad_norm": 2.909184455871582, "learning_rate": 1.552551724137931e-05, "loss": 0.5351, "step": 3755 }, { "epoch": 12.951724137931034, "grad_norm": 0.7028511762619019, "learning_rate": 1.5529655172413795e-05, "loss": 0.3543, "step": 3756 }, { "epoch": 12.955172413793104, "grad_norm": 1.1513944864273071, "learning_rate": 1.5533793103448277e-05, "loss": 0.3484, "step": 3757 }, { "epoch": 12.958620689655172, "grad_norm": 0.6545307040214539, "learning_rate": 1.5537931034482756e-05, "loss": 0.3058, "step": 3758 }, { "epoch": 12.962068965517242, "grad_norm": 0.7636549472808838, "learning_rate": 1.554206896551724e-05, "loss": 0.3151, "step": 3759 }, { "epoch": 12.96551724137931, "grad_norm": 1.0811820030212402, "learning_rate": 1.5546206896551723e-05, "loss": 0.298, "step": 3760 }, { "epoch": 12.96896551724138, "grad_norm": 0.7579725384712219, "learning_rate": 1.555034482758621e-05, "loss": 0.2928, "step": 3761 }, { "epoch": 12.972413793103449, "grad_norm": 0.9077142477035522, "learning_rate": 1.555448275862069e-05, "loss": 0.3133, "step": 3762 }, { "epoch": 12.975862068965517, "grad_norm": 1.202742576599121, "learning_rate": 1.5558620689655173e-05, "loss": 0.2856, "step": 3763 }, { "epoch": 12.979310344827587, "grad_norm": 0.9446380138397217, "learning_rate": 1.5562758620689655e-05, "loss": 0.2961, "step": 3764 }, { "epoch": 12.982758620689655, "grad_norm": 1.159485101699829, "learning_rate": 1.556689655172414e-05, "loss": 0.3054, "step": 3765 }, { "epoch": 12.986206896551725, "grad_norm": 1.3973373174667358, "learning_rate": 1.5571034482758622e-05, "loss": 0.2978, "step": 3766 }, { "epoch": 12.989655172413793, "grad_norm": 1.1910338401794434, "learning_rate": 1.5575172413793104e-05, "loss": 0.3223, "step": 3767 }, { "epoch": 12.993103448275862, "grad_norm": 1.5879347324371338, "learning_rate": 1.5579310344827586e-05, "loss": 0.3206, "step": 3768 }, { "epoch": 12.99655172413793, "grad_norm": 1.1463313102722168, "learning_rate": 1.558344827586207e-05, "loss": 0.3538, "step": 3769 }, { "epoch": 13.0, "grad_norm": 1.8211699724197388, "learning_rate": 1.5587586206896554e-05, "loss": 0.4398, "step": 3770 }, { "epoch": 13.00344827586207, "grad_norm": 0.7863640785217285, "learning_rate": 1.5591724137931032e-05, "loss": 0.331, "step": 3771 }, { "epoch": 13.006896551724138, "grad_norm": 0.6901610493659973, "learning_rate": 1.5595862068965518e-05, "loss": 0.3181, "step": 3772 }, { "epoch": 13.010344827586207, "grad_norm": 1.4481803178787231, "learning_rate": 1.56e-05, "loss": 0.321, "step": 3773 }, { "epoch": 13.013793103448275, "grad_norm": 1.054774284362793, "learning_rate": 1.5604137931034485e-05, "loss": 0.3121, "step": 3774 }, { "epoch": 13.017241379310345, "grad_norm": 0.7607391476631165, "learning_rate": 1.5608275862068964e-05, "loss": 0.3021, "step": 3775 }, { "epoch": 13.020689655172413, "grad_norm": 0.786715030670166, "learning_rate": 1.561241379310345e-05, "loss": 0.2872, "step": 3776 }, { "epoch": 13.024137931034483, "grad_norm": 0.9864574670791626, "learning_rate": 1.561655172413793e-05, "loss": 0.311, "step": 3777 }, { "epoch": 13.027586206896551, "grad_norm": 0.8205450773239136, "learning_rate": 1.5620689655172417e-05, "loss": 0.3199, "step": 3778 }, { "epoch": 13.03103448275862, "grad_norm": 0.8760359883308411, "learning_rate": 1.5624827586206895e-05, "loss": 0.2905, "step": 3779 }, { "epoch": 13.03448275862069, "grad_norm": 1.5282751321792603, "learning_rate": 1.562896551724138e-05, "loss": 0.3211, "step": 3780 }, { "epoch": 13.037931034482758, "grad_norm": 0.8742828369140625, "learning_rate": 1.5633103448275863e-05, "loss": 0.2799, "step": 3781 }, { "epoch": 13.041379310344828, "grad_norm": 0.9447338581085205, "learning_rate": 1.5637241379310348e-05, "loss": 0.3029, "step": 3782 }, { "epoch": 13.044827586206896, "grad_norm": 0.8486144542694092, "learning_rate": 1.5641379310344827e-05, "loss": 0.2854, "step": 3783 }, { "epoch": 13.048275862068966, "grad_norm": 2.028942108154297, "learning_rate": 1.564551724137931e-05, "loss": 0.3353, "step": 3784 }, { "epoch": 13.051724137931034, "grad_norm": 0.8678660988807678, "learning_rate": 1.5649655172413794e-05, "loss": 0.3397, "step": 3785 }, { "epoch": 13.055172413793104, "grad_norm": 1.2652668952941895, "learning_rate": 1.5653793103448276e-05, "loss": 0.2982, "step": 3786 }, { "epoch": 13.058620689655172, "grad_norm": 1.1491369009017944, "learning_rate": 1.5657931034482758e-05, "loss": 0.3212, "step": 3787 }, { "epoch": 13.062068965517241, "grad_norm": 1.0815813541412354, "learning_rate": 1.566206896551724e-05, "loss": 0.3072, "step": 3788 }, { "epoch": 13.065517241379311, "grad_norm": 1.2780815362930298, "learning_rate": 1.5666206896551726e-05, "loss": 0.301, "step": 3789 }, { "epoch": 13.068965517241379, "grad_norm": 2.423398733139038, "learning_rate": 1.5670344827586208e-05, "loss": 0.2826, "step": 3790 }, { "epoch": 13.072413793103449, "grad_norm": 2.2050092220306396, "learning_rate": 1.567448275862069e-05, "loss": 0.336, "step": 3791 }, { "epoch": 13.075862068965517, "grad_norm": 0.9455758333206177, "learning_rate": 1.567862068965517e-05, "loss": 0.3496, "step": 3792 }, { "epoch": 13.079310344827586, "grad_norm": 1.2237825393676758, "learning_rate": 1.5682758620689657e-05, "loss": 0.3348, "step": 3793 }, { "epoch": 13.082758620689654, "grad_norm": 1.5156745910644531, "learning_rate": 1.568689655172414e-05, "loss": 0.3908, "step": 3794 }, { "epoch": 13.086206896551724, "grad_norm": 2.0088231563568115, "learning_rate": 1.569103448275862e-05, "loss": 0.4921, "step": 3795 }, { "epoch": 13.089655172413794, "grad_norm": 0.99468994140625, "learning_rate": 1.5695172413793103e-05, "loss": 0.358, "step": 3796 }, { "epoch": 13.093103448275862, "grad_norm": 0.6456352472305298, "learning_rate": 1.5699310344827585e-05, "loss": 0.3276, "step": 3797 }, { "epoch": 13.096551724137932, "grad_norm": 0.6860771179199219, "learning_rate": 1.570344827586207e-05, "loss": 0.3185, "step": 3798 }, { "epoch": 13.1, "grad_norm": 0.6688587069511414, "learning_rate": 1.5707586206896553e-05, "loss": 0.3139, "step": 3799 }, { "epoch": 13.10344827586207, "grad_norm": 0.7911736369132996, "learning_rate": 1.5711724137931035e-05, "loss": 0.3661, "step": 3800 }, { "epoch": 13.106896551724137, "grad_norm": 0.5489144921302795, "learning_rate": 1.5715862068965517e-05, "loss": 0.3036, "step": 3801 }, { "epoch": 13.110344827586207, "grad_norm": 0.8429774641990662, "learning_rate": 1.5720000000000002e-05, "loss": 0.2982, "step": 3802 }, { "epoch": 13.113793103448275, "grad_norm": 0.73917156457901, "learning_rate": 1.5724137931034484e-05, "loss": 0.3019, "step": 3803 }, { "epoch": 13.117241379310345, "grad_norm": 0.9173564314842224, "learning_rate": 1.5728275862068966e-05, "loss": 0.2966, "step": 3804 }, { "epoch": 13.120689655172415, "grad_norm": 0.9526000022888184, "learning_rate": 1.5732413793103448e-05, "loss": 0.281, "step": 3805 }, { "epoch": 13.124137931034483, "grad_norm": 0.8257577419281006, "learning_rate": 1.5736551724137933e-05, "loss": 0.2885, "step": 3806 }, { "epoch": 13.127586206896552, "grad_norm": 0.8123180866241455, "learning_rate": 1.5740689655172415e-05, "loss": 0.3072, "step": 3807 }, { "epoch": 13.13103448275862, "grad_norm": 0.8739138841629028, "learning_rate": 1.5744827586206897e-05, "loss": 0.3337, "step": 3808 }, { "epoch": 13.13448275862069, "grad_norm": 1.2144919633865356, "learning_rate": 1.574896551724138e-05, "loss": 0.295, "step": 3809 }, { "epoch": 13.137931034482758, "grad_norm": 0.924625039100647, "learning_rate": 1.575310344827586e-05, "loss": 0.3137, "step": 3810 }, { "epoch": 13.141379310344828, "grad_norm": 0.813733696937561, "learning_rate": 1.5757241379310347e-05, "loss": 0.3203, "step": 3811 }, { "epoch": 13.144827586206896, "grad_norm": 0.8716703653335571, "learning_rate": 1.5761379310344826e-05, "loss": 0.314, "step": 3812 }, { "epoch": 13.148275862068965, "grad_norm": 0.7718378305435181, "learning_rate": 1.576551724137931e-05, "loss": 0.2837, "step": 3813 }, { "epoch": 13.151724137931035, "grad_norm": 1.5247361660003662, "learning_rate": 1.5769655172413793e-05, "loss": 0.317, "step": 3814 }, { "epoch": 13.155172413793103, "grad_norm": 2.2764716148376465, "learning_rate": 1.577379310344828e-05, "loss": 0.3049, "step": 3815 }, { "epoch": 13.158620689655173, "grad_norm": 1.1534132957458496, "learning_rate": 1.5777931034482757e-05, "loss": 0.3091, "step": 3816 }, { "epoch": 13.162068965517241, "grad_norm": 1.793583631515503, "learning_rate": 1.5782068965517242e-05, "loss": 0.327, "step": 3817 }, { "epoch": 13.16551724137931, "grad_norm": 2.0863847732543945, "learning_rate": 1.5786206896551724e-05, "loss": 0.3599, "step": 3818 }, { "epoch": 13.168965517241379, "grad_norm": 2.988018274307251, "learning_rate": 1.579034482758621e-05, "loss": 0.3304, "step": 3819 }, { "epoch": 13.172413793103448, "grad_norm": 2.27840518951416, "learning_rate": 1.579448275862069e-05, "loss": 0.4919, "step": 3820 }, { "epoch": 13.175862068965516, "grad_norm": 1.090630292892456, "learning_rate": 1.579862068965517e-05, "loss": 0.3964, "step": 3821 }, { "epoch": 13.179310344827586, "grad_norm": 0.6671313643455505, "learning_rate": 1.5802758620689656e-05, "loss": 0.3142, "step": 3822 }, { "epoch": 13.182758620689656, "grad_norm": 0.6990945935249329, "learning_rate": 1.5806896551724138e-05, "loss": 0.3216, "step": 3823 }, { "epoch": 13.186206896551724, "grad_norm": 0.8825490474700928, "learning_rate": 1.581103448275862e-05, "loss": 0.3208, "step": 3824 }, { "epoch": 13.189655172413794, "grad_norm": 0.9292643070220947, "learning_rate": 1.5815172413793102e-05, "loss": 0.3165, "step": 3825 }, { "epoch": 13.193103448275862, "grad_norm": 0.8393388390541077, "learning_rate": 1.5819310344827587e-05, "loss": 0.2953, "step": 3826 }, { "epoch": 13.196551724137931, "grad_norm": 1.001047134399414, "learning_rate": 1.582344827586207e-05, "loss": 0.3506, "step": 3827 }, { "epoch": 13.2, "grad_norm": 0.737598717212677, "learning_rate": 1.5827586206896555e-05, "loss": 0.3257, "step": 3828 }, { "epoch": 13.203448275862069, "grad_norm": 0.755183756351471, "learning_rate": 1.5831724137931033e-05, "loss": 0.3017, "step": 3829 }, { "epoch": 13.206896551724139, "grad_norm": 0.6479543447494507, "learning_rate": 1.583586206896552e-05, "loss": 0.2833, "step": 3830 }, { "epoch": 13.210344827586207, "grad_norm": 0.7679268717765808, "learning_rate": 1.584e-05, "loss": 0.3151, "step": 3831 }, { "epoch": 13.213793103448277, "grad_norm": 1.0979818105697632, "learning_rate": 1.5844137931034486e-05, "loss": 0.3238, "step": 3832 }, { "epoch": 13.217241379310344, "grad_norm": 0.7465900778770447, "learning_rate": 1.5848275862068965e-05, "loss": 0.2998, "step": 3833 }, { "epoch": 13.220689655172414, "grad_norm": 1.0296040773391724, "learning_rate": 1.5852413793103447e-05, "loss": 0.2762, "step": 3834 }, { "epoch": 13.224137931034482, "grad_norm": 1.5726326704025269, "learning_rate": 1.5856551724137932e-05, "loss": 0.28, "step": 3835 }, { "epoch": 13.227586206896552, "grad_norm": 1.0963038206100464, "learning_rate": 1.5860689655172414e-05, "loss": 0.3098, "step": 3836 }, { "epoch": 13.23103448275862, "grad_norm": 1.0044384002685547, "learning_rate": 1.5864827586206896e-05, "loss": 0.2837, "step": 3837 }, { "epoch": 13.23448275862069, "grad_norm": 1.0235726833343506, "learning_rate": 1.586896551724138e-05, "loss": 0.3127, "step": 3838 }, { "epoch": 13.23793103448276, "grad_norm": 0.9474796652793884, "learning_rate": 1.5873103448275864e-05, "loss": 0.2832, "step": 3839 }, { "epoch": 13.241379310344827, "grad_norm": 1.4026682376861572, "learning_rate": 1.5877241379310346e-05, "loss": 0.3397, "step": 3840 }, { "epoch": 13.244827586206897, "grad_norm": 1.1494042873382568, "learning_rate": 1.5881379310344828e-05, "loss": 0.3088, "step": 3841 }, { "epoch": 13.248275862068965, "grad_norm": 1.6348166465759277, "learning_rate": 1.588551724137931e-05, "loss": 0.3196, "step": 3842 }, { "epoch": 13.251724137931035, "grad_norm": 2.0501697063446045, "learning_rate": 1.5889655172413795e-05, "loss": 0.3505, "step": 3843 }, { "epoch": 13.255172413793103, "grad_norm": 1.1253981590270996, "learning_rate": 1.5893793103448277e-05, "loss": 0.3228, "step": 3844 }, { "epoch": 13.258620689655173, "grad_norm": 4.988155364990234, "learning_rate": 1.589793103448276e-05, "loss": 0.4476, "step": 3845 }, { "epoch": 13.26206896551724, "grad_norm": 0.8632540702819824, "learning_rate": 1.590206896551724e-05, "loss": 0.3705, "step": 3846 }, { "epoch": 13.26551724137931, "grad_norm": 0.5732505321502686, "learning_rate": 1.5906206896551723e-05, "loss": 0.2921, "step": 3847 }, { "epoch": 13.26896551724138, "grad_norm": 1.0180329084396362, "learning_rate": 1.591034482758621e-05, "loss": 0.2888, "step": 3848 }, { "epoch": 13.272413793103448, "grad_norm": 1.020774245262146, "learning_rate": 1.5914482758620687e-05, "loss": 0.3309, "step": 3849 }, { "epoch": 13.275862068965518, "grad_norm": 0.8605738282203674, "learning_rate": 1.5918620689655173e-05, "loss": 0.3212, "step": 3850 }, { "epoch": 13.279310344827586, "grad_norm": 0.7603893876075745, "learning_rate": 1.5922758620689655e-05, "loss": 0.3055, "step": 3851 }, { "epoch": 13.282758620689656, "grad_norm": 0.675955593585968, "learning_rate": 1.592689655172414e-05, "loss": 0.3205, "step": 3852 }, { "epoch": 13.286206896551723, "grad_norm": 1.2262334823608398, "learning_rate": 1.593103448275862e-05, "loss": 0.3484, "step": 3853 }, { "epoch": 13.289655172413793, "grad_norm": 0.8649576902389526, "learning_rate": 1.5935172413793104e-05, "loss": 0.2908, "step": 3854 }, { "epoch": 13.293103448275861, "grad_norm": 0.7772523164749146, "learning_rate": 1.5939310344827586e-05, "loss": 0.304, "step": 3855 }, { "epoch": 13.296551724137931, "grad_norm": 1.1317411661148071, "learning_rate": 1.594344827586207e-05, "loss": 0.3292, "step": 3856 }, { "epoch": 13.3, "grad_norm": 0.7499098181724548, "learning_rate": 1.594758620689655e-05, "loss": 0.3089, "step": 3857 }, { "epoch": 13.303448275862069, "grad_norm": 0.9406294822692871, "learning_rate": 1.5951724137931036e-05, "loss": 0.3259, "step": 3858 }, { "epoch": 13.306896551724138, "grad_norm": 0.8141098618507385, "learning_rate": 1.5955862068965518e-05, "loss": 0.3088, "step": 3859 }, { "epoch": 13.310344827586206, "grad_norm": 1.1436827182769775, "learning_rate": 1.596e-05, "loss": 0.3049, "step": 3860 }, { "epoch": 13.313793103448276, "grad_norm": 0.8027977347373962, "learning_rate": 1.5964137931034485e-05, "loss": 0.322, "step": 3861 }, { "epoch": 13.317241379310344, "grad_norm": 1.0589709281921387, "learning_rate": 1.5968275862068964e-05, "loss": 0.3325, "step": 3862 }, { "epoch": 13.320689655172414, "grad_norm": 0.8472928404808044, "learning_rate": 1.597241379310345e-05, "loss": 0.305, "step": 3863 }, { "epoch": 13.324137931034482, "grad_norm": 0.9896323680877686, "learning_rate": 1.597655172413793e-05, "loss": 0.3344, "step": 3864 }, { "epoch": 13.327586206896552, "grad_norm": 1.125645637512207, "learning_rate": 1.5980689655172417e-05, "loss": 0.2949, "step": 3865 }, { "epoch": 13.331034482758621, "grad_norm": 0.8921010494232178, "learning_rate": 1.5984827586206895e-05, "loss": 0.3557, "step": 3866 }, { "epoch": 13.33448275862069, "grad_norm": 0.9513311386108398, "learning_rate": 1.598896551724138e-05, "loss": 0.314, "step": 3867 }, { "epoch": 13.337931034482759, "grad_norm": 1.191833734512329, "learning_rate": 1.5993103448275863e-05, "loss": 0.314, "step": 3868 }, { "epoch": 13.341379310344827, "grad_norm": 0.9038443565368652, "learning_rate": 1.5997241379310348e-05, "loss": 0.3379, "step": 3869 }, { "epoch": 13.344827586206897, "grad_norm": 9.767195701599121, "learning_rate": 1.6001379310344827e-05, "loss": 0.4173, "step": 3870 }, { "epoch": 13.348275862068965, "grad_norm": 0.851170003414154, "learning_rate": 1.6005517241379312e-05, "loss": 0.3442, "step": 3871 }, { "epoch": 13.351724137931035, "grad_norm": 0.8162170052528381, "learning_rate": 1.6009655172413794e-05, "loss": 0.3545, "step": 3872 }, { "epoch": 13.355172413793104, "grad_norm": 0.6850621700286865, "learning_rate": 1.6013793103448276e-05, "loss": 0.3445, "step": 3873 }, { "epoch": 13.358620689655172, "grad_norm": 0.8520984053611755, "learning_rate": 1.6017931034482758e-05, "loss": 0.3146, "step": 3874 }, { "epoch": 13.362068965517242, "grad_norm": 0.7056204080581665, "learning_rate": 1.602206896551724e-05, "loss": 0.3433, "step": 3875 }, { "epoch": 13.36551724137931, "grad_norm": 0.7750594615936279, "learning_rate": 1.6026206896551725e-05, "loss": 0.3051, "step": 3876 }, { "epoch": 13.36896551724138, "grad_norm": 0.9734617471694946, "learning_rate": 1.6030344827586208e-05, "loss": 0.323, "step": 3877 }, { "epoch": 13.372413793103448, "grad_norm": 1.0388004779815674, "learning_rate": 1.603448275862069e-05, "loss": 0.3057, "step": 3878 }, { "epoch": 13.375862068965517, "grad_norm": 0.7847442030906677, "learning_rate": 1.603862068965517e-05, "loss": 0.3214, "step": 3879 }, { "epoch": 13.379310344827585, "grad_norm": 0.8838671445846558, "learning_rate": 1.6042758620689657e-05, "loss": 0.3103, "step": 3880 }, { "epoch": 13.382758620689655, "grad_norm": 1.0579066276550293, "learning_rate": 1.604689655172414e-05, "loss": 0.3407, "step": 3881 }, { "epoch": 13.386206896551725, "grad_norm": 1.1953500509262085, "learning_rate": 1.605103448275862e-05, "loss": 0.3064, "step": 3882 }, { "epoch": 13.389655172413793, "grad_norm": 0.7439384460449219, "learning_rate": 1.6055172413793103e-05, "loss": 0.2894, "step": 3883 }, { "epoch": 13.393103448275863, "grad_norm": 0.9325054883956909, "learning_rate": 1.605931034482759e-05, "loss": 0.313, "step": 3884 }, { "epoch": 13.39655172413793, "grad_norm": 1.31337308883667, "learning_rate": 1.606344827586207e-05, "loss": 0.3283, "step": 3885 }, { "epoch": 13.4, "grad_norm": 1.0101287364959717, "learning_rate": 1.606758620689655e-05, "loss": 0.3177, "step": 3886 }, { "epoch": 13.403448275862068, "grad_norm": 0.7739372849464417, "learning_rate": 1.6071724137931034e-05, "loss": 0.3101, "step": 3887 }, { "epoch": 13.406896551724138, "grad_norm": 1.3148472309112549, "learning_rate": 1.6075862068965516e-05, "loss": 0.3378, "step": 3888 }, { "epoch": 13.410344827586206, "grad_norm": 1.3758227825164795, "learning_rate": 1.6080000000000002e-05, "loss": 0.3084, "step": 3889 }, { "epoch": 13.413793103448276, "grad_norm": 2.3317713737487793, "learning_rate": 1.608413793103448e-05, "loss": 0.3129, "step": 3890 }, { "epoch": 13.417241379310346, "grad_norm": 1.2671279907226562, "learning_rate": 1.6088275862068966e-05, "loss": 0.3421, "step": 3891 }, { "epoch": 13.420689655172414, "grad_norm": 1.1761630773544312, "learning_rate": 1.6092413793103448e-05, "loss": 0.3074, "step": 3892 }, { "epoch": 13.424137931034483, "grad_norm": 1.194441795349121, "learning_rate": 1.6096551724137933e-05, "loss": 0.3272, "step": 3893 }, { "epoch": 13.427586206896551, "grad_norm": 1.7417285442352295, "learning_rate": 1.6100689655172412e-05, "loss": 0.401, "step": 3894 }, { "epoch": 13.431034482758621, "grad_norm": 2.420424699783325, "learning_rate": 1.6104827586206897e-05, "loss": 0.5353, "step": 3895 }, { "epoch": 13.434482758620689, "grad_norm": 0.8034090399742126, "learning_rate": 1.610896551724138e-05, "loss": 0.3497, "step": 3896 }, { "epoch": 13.437931034482759, "grad_norm": 0.6744460463523865, "learning_rate": 1.6113103448275865e-05, "loss": 0.3095, "step": 3897 }, { "epoch": 13.441379310344828, "grad_norm": 0.8056693077087402, "learning_rate": 1.6117241379310347e-05, "loss": 0.3225, "step": 3898 }, { "epoch": 13.444827586206896, "grad_norm": 0.87493497133255, "learning_rate": 1.6121379310344825e-05, "loss": 0.3055, "step": 3899 }, { "epoch": 13.448275862068966, "grad_norm": 0.8489865660667419, "learning_rate": 1.612551724137931e-05, "loss": 0.3037, "step": 3900 }, { "epoch": 13.451724137931034, "grad_norm": 0.6477885842323303, "learning_rate": 1.6129655172413793e-05, "loss": 0.3024, "step": 3901 }, { "epoch": 13.455172413793104, "grad_norm": 0.8185053467750549, "learning_rate": 1.6133793103448278e-05, "loss": 0.3555, "step": 3902 }, { "epoch": 13.458620689655172, "grad_norm": 0.6204527020454407, "learning_rate": 1.6137931034482757e-05, "loss": 0.3262, "step": 3903 }, { "epoch": 13.462068965517242, "grad_norm": 0.7475156784057617, "learning_rate": 1.6142068965517242e-05, "loss": 0.3051, "step": 3904 }, { "epoch": 13.46551724137931, "grad_norm": 0.9663641452789307, "learning_rate": 1.6146206896551724e-05, "loss": 0.3215, "step": 3905 }, { "epoch": 13.46896551724138, "grad_norm": 0.8489738702774048, "learning_rate": 1.615034482758621e-05, "loss": 0.2858, "step": 3906 }, { "epoch": 13.472413793103449, "grad_norm": 0.6659765839576721, "learning_rate": 1.615448275862069e-05, "loss": 0.2985, "step": 3907 }, { "epoch": 13.475862068965517, "grad_norm": 0.7658395767211914, "learning_rate": 1.6158620689655174e-05, "loss": 0.3269, "step": 3908 }, { "epoch": 13.479310344827587, "grad_norm": 0.8190159201622009, "learning_rate": 1.6162758620689656e-05, "loss": 0.3061, "step": 3909 }, { "epoch": 13.482758620689655, "grad_norm": 1.386561393737793, "learning_rate": 1.616689655172414e-05, "loss": 0.3064, "step": 3910 }, { "epoch": 13.486206896551725, "grad_norm": 1.3715462684631348, "learning_rate": 1.617103448275862e-05, "loss": 0.3272, "step": 3911 }, { "epoch": 13.489655172413793, "grad_norm": 1.0486552715301514, "learning_rate": 1.6175172413793102e-05, "loss": 0.3014, "step": 3912 }, { "epoch": 13.493103448275862, "grad_norm": 1.2728582620620728, "learning_rate": 1.6179310344827587e-05, "loss": 0.3049, "step": 3913 }, { "epoch": 13.49655172413793, "grad_norm": 0.9696835875511169, "learning_rate": 1.618344827586207e-05, "loss": 0.3181, "step": 3914 }, { "epoch": 13.5, "grad_norm": 1.1824218034744263, "learning_rate": 1.618758620689655e-05, "loss": 0.3278, "step": 3915 }, { "epoch": 13.50344827586207, "grad_norm": 1.1745522022247314, "learning_rate": 1.6191724137931033e-05, "loss": 0.3477, "step": 3916 }, { "epoch": 13.506896551724138, "grad_norm": 1.2596272230148315, "learning_rate": 1.619586206896552e-05, "loss": 0.3234, "step": 3917 }, { "epoch": 13.510344827586207, "grad_norm": 2.511472463607788, "learning_rate": 1.62e-05, "loss": 0.2983, "step": 3918 }, { "epoch": 13.513793103448275, "grad_norm": 2.572240114212036, "learning_rate": 1.6204137931034483e-05, "loss": 0.3873, "step": 3919 }, { "epoch": 13.517241379310345, "grad_norm": 2.32352352142334, "learning_rate": 1.6208275862068965e-05, "loss": 0.4338, "step": 3920 }, { "epoch": 13.520689655172413, "grad_norm": 0.9517437815666199, "learning_rate": 1.621241379310345e-05, "loss": 0.4155, "step": 3921 }, { "epoch": 13.524137931034483, "grad_norm": 1.297082543373108, "learning_rate": 1.6216551724137932e-05, "loss": 0.3615, "step": 3922 }, { "epoch": 13.527586206896551, "grad_norm": 1.012150764465332, "learning_rate": 1.6220689655172414e-05, "loss": 0.3053, "step": 3923 }, { "epoch": 13.53103448275862, "grad_norm": 0.6447804570198059, "learning_rate": 1.6224827586206896e-05, "loss": 0.2963, "step": 3924 }, { "epoch": 13.53448275862069, "grad_norm": 1.8127444982528687, "learning_rate": 1.6228965517241378e-05, "loss": 0.3497, "step": 3925 }, { "epoch": 13.537931034482758, "grad_norm": 0.8113629221916199, "learning_rate": 1.6233103448275864e-05, "loss": 0.3429, "step": 3926 }, { "epoch": 13.541379310344828, "grad_norm": 0.8755810260772705, "learning_rate": 1.6237241379310342e-05, "loss": 0.3024, "step": 3927 }, { "epoch": 13.544827586206896, "grad_norm": 1.6134884357452393, "learning_rate": 1.6241379310344828e-05, "loss": 0.294, "step": 3928 }, { "epoch": 13.548275862068966, "grad_norm": 0.7308496832847595, "learning_rate": 1.624551724137931e-05, "loss": 0.3297, "step": 3929 }, { "epoch": 13.551724137931034, "grad_norm": 0.8594422936439514, "learning_rate": 1.6249655172413795e-05, "loss": 0.3154, "step": 3930 }, { "epoch": 13.555172413793104, "grad_norm": 1.0082261562347412, "learning_rate": 1.6253793103448277e-05, "loss": 0.2942, "step": 3931 }, { "epoch": 13.558620689655172, "grad_norm": 0.7459651231765747, "learning_rate": 1.625793103448276e-05, "loss": 0.339, "step": 3932 }, { "epoch": 13.562068965517241, "grad_norm": 1.0104563236236572, "learning_rate": 1.626206896551724e-05, "loss": 0.2848, "step": 3933 }, { "epoch": 13.565517241379311, "grad_norm": 1.0495679378509521, "learning_rate": 1.6266206896551727e-05, "loss": 0.3233, "step": 3934 }, { "epoch": 13.568965517241379, "grad_norm": 1.3528505563735962, "learning_rate": 1.627034482758621e-05, "loss": 0.3201, "step": 3935 }, { "epoch": 13.572413793103449, "grad_norm": 0.7814039587974548, "learning_rate": 1.627448275862069e-05, "loss": 0.2817, "step": 3936 }, { "epoch": 13.575862068965517, "grad_norm": 1.1241358518600464, "learning_rate": 1.6278620689655173e-05, "loss": 0.3261, "step": 3937 }, { "epoch": 13.579310344827586, "grad_norm": 0.9397447109222412, "learning_rate": 1.6282758620689655e-05, "loss": 0.32, "step": 3938 }, { "epoch": 13.582758620689654, "grad_norm": 1.2300153970718384, "learning_rate": 1.628689655172414e-05, "loss": 0.2799, "step": 3939 }, { "epoch": 13.586206896551724, "grad_norm": 0.8567183613777161, "learning_rate": 1.629103448275862e-05, "loss": 0.2996, "step": 3940 }, { "epoch": 13.589655172413792, "grad_norm": 1.142048716545105, "learning_rate": 1.6295172413793104e-05, "loss": 0.3356, "step": 3941 }, { "epoch": 13.593103448275862, "grad_norm": 1.1243555545806885, "learning_rate": 1.6299310344827586e-05, "loss": 0.3049, "step": 3942 }, { "epoch": 13.596551724137932, "grad_norm": 1.3337563276290894, "learning_rate": 1.630344827586207e-05, "loss": 0.3812, "step": 3943 }, { "epoch": 13.6, "grad_norm": 1.363194465637207, "learning_rate": 1.630758620689655e-05, "loss": 0.3247, "step": 3944 }, { "epoch": 13.60344827586207, "grad_norm": 1.8258910179138184, "learning_rate": 1.6311724137931036e-05, "loss": 0.4433, "step": 3945 }, { "epoch": 13.606896551724137, "grad_norm": 0.7324004769325256, "learning_rate": 1.6315862068965518e-05, "loss": 0.349, "step": 3946 }, { "epoch": 13.610344827586207, "grad_norm": 0.8726509213447571, "learning_rate": 1.6320000000000003e-05, "loss": 0.3684, "step": 3947 }, { "epoch": 13.613793103448275, "grad_norm": 0.6111096143722534, "learning_rate": 1.632413793103448e-05, "loss": 0.3085, "step": 3948 }, { "epoch": 13.617241379310345, "grad_norm": 0.7267473340034485, "learning_rate": 1.6328275862068967e-05, "loss": 0.3265, "step": 3949 }, { "epoch": 13.620689655172415, "grad_norm": 1.6655889749526978, "learning_rate": 1.633241379310345e-05, "loss": 0.3092, "step": 3950 }, { "epoch": 13.624137931034483, "grad_norm": 0.862179160118103, "learning_rate": 1.633655172413793e-05, "loss": 0.3293, "step": 3951 }, { "epoch": 13.627586206896552, "grad_norm": 1.2269164323806763, "learning_rate": 1.6340689655172413e-05, "loss": 0.3167, "step": 3952 }, { "epoch": 13.63103448275862, "grad_norm": 0.938220202922821, "learning_rate": 1.6344827586206895e-05, "loss": 0.3342, "step": 3953 }, { "epoch": 13.63448275862069, "grad_norm": 0.6839984655380249, "learning_rate": 1.634896551724138e-05, "loss": 0.2694, "step": 3954 }, { "epoch": 13.637931034482758, "grad_norm": 1.0894001722335815, "learning_rate": 1.6353103448275863e-05, "loss": 0.3097, "step": 3955 }, { "epoch": 13.641379310344828, "grad_norm": 1.057110071182251, "learning_rate": 1.6357241379310345e-05, "loss": 0.3121, "step": 3956 }, { "epoch": 13.644827586206896, "grad_norm": 0.9424734711647034, "learning_rate": 1.6361379310344827e-05, "loss": 0.3128, "step": 3957 }, { "epoch": 13.648275862068965, "grad_norm": 0.7607743144035339, "learning_rate": 1.6365517241379312e-05, "loss": 0.2995, "step": 3958 }, { "epoch": 13.651724137931035, "grad_norm": 1.183838129043579, "learning_rate": 1.6369655172413794e-05, "loss": 0.3068, "step": 3959 }, { "epoch": 13.655172413793103, "grad_norm": 1.058556318283081, "learning_rate": 1.6373793103448276e-05, "loss": 0.3007, "step": 3960 }, { "epoch": 13.658620689655173, "grad_norm": 1.7066229581832886, "learning_rate": 1.6377931034482758e-05, "loss": 0.3119, "step": 3961 }, { "epoch": 13.662068965517241, "grad_norm": 1.0775262117385864, "learning_rate": 1.6382068965517243e-05, "loss": 0.2845, "step": 3962 }, { "epoch": 13.66551724137931, "grad_norm": 1.4826245307922363, "learning_rate": 1.6386206896551725e-05, "loss": 0.2803, "step": 3963 }, { "epoch": 13.668965517241379, "grad_norm": 1.3921916484832764, "learning_rate": 1.6390344827586207e-05, "loss": 0.3468, "step": 3964 }, { "epoch": 13.672413793103448, "grad_norm": 1.0432838201522827, "learning_rate": 1.639448275862069e-05, "loss": 0.2926, "step": 3965 }, { "epoch": 13.675862068965518, "grad_norm": 0.9637266397476196, "learning_rate": 1.639862068965517e-05, "loss": 0.3313, "step": 3966 }, { "epoch": 13.679310344827586, "grad_norm": 0.9723725914955139, "learning_rate": 1.6402758620689657e-05, "loss": 0.3261, "step": 3967 }, { "epoch": 13.682758620689656, "grad_norm": 1.5967835187911987, "learning_rate": 1.640689655172414e-05, "loss": 0.3455, "step": 3968 }, { "epoch": 13.686206896551724, "grad_norm": 1.54098379611969, "learning_rate": 1.641103448275862e-05, "loss": 0.3998, "step": 3969 }, { "epoch": 13.689655172413794, "grad_norm": 1.6348092555999756, "learning_rate": 1.6415172413793103e-05, "loss": 0.409, "step": 3970 }, { "epoch": 13.693103448275862, "grad_norm": 1.30031156539917, "learning_rate": 1.641931034482759e-05, "loss": 0.4022, "step": 3971 }, { "epoch": 13.696551724137931, "grad_norm": 0.7689396739006042, "learning_rate": 1.642344827586207e-05, "loss": 0.3322, "step": 3972 }, { "epoch": 13.7, "grad_norm": 0.9652667045593262, "learning_rate": 1.6427586206896552e-05, "loss": 0.3482, "step": 3973 }, { "epoch": 13.703448275862069, "grad_norm": 1.1144545078277588, "learning_rate": 1.6431724137931034e-05, "loss": 0.3342, "step": 3974 }, { "epoch": 13.706896551724139, "grad_norm": 0.9094724655151367, "learning_rate": 1.643586206896552e-05, "loss": 0.3235, "step": 3975 }, { "epoch": 13.710344827586207, "grad_norm": 0.7061663866043091, "learning_rate": 1.6440000000000002e-05, "loss": 0.2897, "step": 3976 }, { "epoch": 13.713793103448277, "grad_norm": 0.9347353577613831, "learning_rate": 1.644413793103448e-05, "loss": 0.3514, "step": 3977 }, { "epoch": 13.717241379310344, "grad_norm": 0.6418756246566772, "learning_rate": 1.6448275862068966e-05, "loss": 0.2682, "step": 3978 }, { "epoch": 13.720689655172414, "grad_norm": 1.0162315368652344, "learning_rate": 1.6452413793103448e-05, "loss": 0.2942, "step": 3979 }, { "epoch": 13.724137931034482, "grad_norm": 0.9851358532905579, "learning_rate": 1.6456551724137933e-05, "loss": 0.3117, "step": 3980 }, { "epoch": 13.727586206896552, "grad_norm": 1.3320353031158447, "learning_rate": 1.6460689655172412e-05, "loss": 0.2833, "step": 3981 }, { "epoch": 13.73103448275862, "grad_norm": 0.8743016123771667, "learning_rate": 1.6464827586206897e-05, "loss": 0.3655, "step": 3982 }, { "epoch": 13.73448275862069, "grad_norm": 1.0603967905044556, "learning_rate": 1.646896551724138e-05, "loss": 0.2939, "step": 3983 }, { "epoch": 13.73793103448276, "grad_norm": 0.9030147790908813, "learning_rate": 1.6473103448275865e-05, "loss": 0.2971, "step": 3984 }, { "epoch": 13.741379310344827, "grad_norm": 1.6104263067245483, "learning_rate": 1.6477241379310343e-05, "loss": 0.3413, "step": 3985 }, { "epoch": 13.744827586206897, "grad_norm": 1.079408049583435, "learning_rate": 1.648137931034483e-05, "loss": 0.3141, "step": 3986 }, { "epoch": 13.748275862068965, "grad_norm": 1.1289973258972168, "learning_rate": 1.648551724137931e-05, "loss": 0.2939, "step": 3987 }, { "epoch": 13.751724137931035, "grad_norm": 1.814481258392334, "learning_rate": 1.6489655172413793e-05, "loss": 0.3312, "step": 3988 }, { "epoch": 13.755172413793103, "grad_norm": 1.592763066291809, "learning_rate": 1.6493793103448275e-05, "loss": 0.3558, "step": 3989 }, { "epoch": 13.758620689655173, "grad_norm": 1.106598138809204, "learning_rate": 1.6497931034482757e-05, "loss": 0.3254, "step": 3990 }, { "epoch": 13.76206896551724, "grad_norm": 2.8329169750213623, "learning_rate": 1.6502068965517242e-05, "loss": 0.3529, "step": 3991 }, { "epoch": 13.76551724137931, "grad_norm": 1.356931447982788, "learning_rate": 1.6506206896551724e-05, "loss": 0.3111, "step": 3992 }, { "epoch": 13.76896551724138, "grad_norm": 1.3073530197143555, "learning_rate": 1.6510344827586206e-05, "loss": 0.3357, "step": 3993 }, { "epoch": 13.772413793103448, "grad_norm": 1.6070795059204102, "learning_rate": 1.651448275862069e-05, "loss": 0.3552, "step": 3994 }, { "epoch": 13.775862068965518, "grad_norm": 1.9218428134918213, "learning_rate": 1.6518620689655174e-05, "loss": 0.5018, "step": 3995 }, { "epoch": 13.779310344827586, "grad_norm": 0.6326826810836792, "learning_rate": 1.6522758620689656e-05, "loss": 0.3273, "step": 3996 }, { "epoch": 13.782758620689656, "grad_norm": 1.0610485076904297, "learning_rate": 1.652689655172414e-05, "loss": 0.341, "step": 3997 }, { "epoch": 13.786206896551723, "grad_norm": 0.9667916893959045, "learning_rate": 1.653103448275862e-05, "loss": 0.3698, "step": 3998 }, { "epoch": 13.789655172413793, "grad_norm": 2.0316007137298584, "learning_rate": 1.6535172413793105e-05, "loss": 0.3394, "step": 3999 }, { "epoch": 13.793103448275861, "grad_norm": 0.9057490825653076, "learning_rate": 1.6539310344827587e-05, "loss": 0.305, "step": 4000 }, { "epoch": 13.793103448275861, "eval_cer": 0.1392303925358901, "eval_loss": 0.3367466330528259, "eval_runtime": 19.0564, "eval_samples_per_second": 48.645, "eval_steps_per_second": 0.157, "eval_wer": 0.3356625258799172, "step": 4000 }, { "epoch": 13.796551724137931, "grad_norm": 0.8310452699661255, "learning_rate": 1.654344827586207e-05, "loss": 0.328, "step": 4001 }, { "epoch": 13.8, "grad_norm": 0.7370907664299011, "learning_rate": 1.654758620689655e-05, "loss": 0.3045, "step": 4002 }, { "epoch": 13.803448275862069, "grad_norm": 0.765990138053894, "learning_rate": 1.6551724137931033e-05, "loss": 0.3155, "step": 4003 }, { "epoch": 13.806896551724138, "grad_norm": 0.8331217765808105, "learning_rate": 1.655586206896552e-05, "loss": 0.3328, "step": 4004 }, { "epoch": 13.810344827586206, "grad_norm": 0.7738590240478516, "learning_rate": 1.656e-05, "loss": 0.2936, "step": 4005 }, { "epoch": 13.813793103448276, "grad_norm": 0.9100485444068909, "learning_rate": 1.6564137931034483e-05, "loss": 0.353, "step": 4006 }, { "epoch": 13.817241379310344, "grad_norm": 0.9965589046478271, "learning_rate": 1.6568275862068965e-05, "loss": 0.3105, "step": 4007 }, { "epoch": 13.820689655172414, "grad_norm": 0.8979689478874207, "learning_rate": 1.657241379310345e-05, "loss": 0.3021, "step": 4008 }, { "epoch": 13.824137931034482, "grad_norm": 0.8958078026771545, "learning_rate": 1.6576551724137932e-05, "loss": 0.3203, "step": 4009 }, { "epoch": 13.827586206896552, "grad_norm": 1.9795936346054077, "learning_rate": 1.6580689655172414e-05, "loss": 0.3474, "step": 4010 }, { "epoch": 13.831034482758621, "grad_norm": 0.7928299903869629, "learning_rate": 1.6584827586206896e-05, "loss": 0.2839, "step": 4011 }, { "epoch": 13.83448275862069, "grad_norm": 1.1124587059020996, "learning_rate": 1.658896551724138e-05, "loss": 0.3024, "step": 4012 }, { "epoch": 13.837931034482759, "grad_norm": 0.9605265855789185, "learning_rate": 1.6593103448275864e-05, "loss": 0.3025, "step": 4013 }, { "epoch": 13.841379310344827, "grad_norm": 1.2697995901107788, "learning_rate": 1.6597241379310342e-05, "loss": 0.2904, "step": 4014 }, { "epoch": 13.844827586206897, "grad_norm": 1.2774806022644043, "learning_rate": 1.6601379310344828e-05, "loss": 0.3052, "step": 4015 }, { "epoch": 13.848275862068965, "grad_norm": 1.4354305267333984, "learning_rate": 1.660551724137931e-05, "loss": 0.3566, "step": 4016 }, { "epoch": 13.851724137931035, "grad_norm": 1.2270746231079102, "learning_rate": 1.6609655172413795e-05, "loss": 0.2986, "step": 4017 }, { "epoch": 13.855172413793104, "grad_norm": 1.9950950145721436, "learning_rate": 1.6613793103448274e-05, "loss": 0.3254, "step": 4018 }, { "epoch": 13.858620689655172, "grad_norm": 1.3066366910934448, "learning_rate": 1.661793103448276e-05, "loss": 0.3692, "step": 4019 }, { "epoch": 13.862068965517242, "grad_norm": 1.917973518371582, "learning_rate": 1.662206896551724e-05, "loss": 0.4367, "step": 4020 }, { "epoch": 13.86551724137931, "grad_norm": 0.7863385081291199, "learning_rate": 1.6626206896551727e-05, "loss": 0.3803, "step": 4021 }, { "epoch": 13.86896551724138, "grad_norm": 0.6514551639556885, "learning_rate": 1.6630344827586205e-05, "loss": 0.3022, "step": 4022 }, { "epoch": 13.872413793103448, "grad_norm": 0.9944365620613098, "learning_rate": 1.663448275862069e-05, "loss": 0.3425, "step": 4023 }, { "epoch": 13.875862068965517, "grad_norm": 0.796743631362915, "learning_rate": 1.6638620689655173e-05, "loss": 0.3352, "step": 4024 }, { "epoch": 13.879310344827585, "grad_norm": 0.6671460866928101, "learning_rate": 1.6642758620689658e-05, "loss": 0.2969, "step": 4025 }, { "epoch": 13.882758620689655, "grad_norm": 0.7031434178352356, "learning_rate": 1.6646896551724137e-05, "loss": 0.3113, "step": 4026 }, { "epoch": 13.886206896551725, "grad_norm": 1.451688289642334, "learning_rate": 1.665103448275862e-05, "loss": 0.32, "step": 4027 }, { "epoch": 13.889655172413793, "grad_norm": 0.8854536414146423, "learning_rate": 1.6655172413793104e-05, "loss": 0.3163, "step": 4028 }, { "epoch": 13.893103448275863, "grad_norm": 1.0396337509155273, "learning_rate": 1.6659310344827586e-05, "loss": 0.3081, "step": 4029 }, { "epoch": 13.89655172413793, "grad_norm": 2.0046184062957764, "learning_rate": 1.666344827586207e-05, "loss": 0.2983, "step": 4030 }, { "epoch": 13.9, "grad_norm": 1.0110043287277222, "learning_rate": 1.666758620689655e-05, "loss": 0.306, "step": 4031 }, { "epoch": 13.903448275862068, "grad_norm": 1.3513706922531128, "learning_rate": 1.6671724137931036e-05, "loss": 0.3405, "step": 4032 }, { "epoch": 13.906896551724138, "grad_norm": 0.8042402863502502, "learning_rate": 1.6675862068965518e-05, "loss": 0.3242, "step": 4033 }, { "epoch": 13.910344827586208, "grad_norm": 0.8182509541511536, "learning_rate": 1.6680000000000003e-05, "loss": 0.3045, "step": 4034 }, { "epoch": 13.913793103448276, "grad_norm": 0.9360842108726501, "learning_rate": 1.668413793103448e-05, "loss": 0.3259, "step": 4035 }, { "epoch": 13.917241379310346, "grad_norm": 0.7183888554573059, "learning_rate": 1.6688275862068967e-05, "loss": 0.289, "step": 4036 }, { "epoch": 13.920689655172414, "grad_norm": 1.1916860342025757, "learning_rate": 1.669241379310345e-05, "loss": 0.3242, "step": 4037 }, { "epoch": 13.924137931034483, "grad_norm": 1.0486013889312744, "learning_rate": 1.6696551724137934e-05, "loss": 0.2972, "step": 4038 }, { "epoch": 13.927586206896551, "grad_norm": 1.1006993055343628, "learning_rate": 1.6700689655172413e-05, "loss": 0.2783, "step": 4039 }, { "epoch": 13.931034482758621, "grad_norm": 1.0394728183746338, "learning_rate": 1.6704827586206895e-05, "loss": 0.33, "step": 4040 }, { "epoch": 13.934482758620689, "grad_norm": 1.2603517770767212, "learning_rate": 1.670896551724138e-05, "loss": 0.3343, "step": 4041 }, { "epoch": 13.937931034482759, "grad_norm": 0.9034843444824219, "learning_rate": 1.6713103448275862e-05, "loss": 0.33, "step": 4042 }, { "epoch": 13.941379310344828, "grad_norm": 1.0194214582443237, "learning_rate": 1.6717241379310344e-05, "loss": 0.3102, "step": 4043 }, { "epoch": 13.944827586206896, "grad_norm": 1.4171812534332275, "learning_rate": 1.6721379310344826e-05, "loss": 0.353, "step": 4044 }, { "epoch": 13.948275862068966, "grad_norm": 1.76985502243042, "learning_rate": 1.6725517241379312e-05, "loss": 0.4708, "step": 4045 }, { "epoch": 13.951724137931034, "grad_norm": 0.9054811596870422, "learning_rate": 1.6729655172413794e-05, "loss": 0.3754, "step": 4046 }, { "epoch": 13.955172413793104, "grad_norm": 0.7820034027099609, "learning_rate": 1.6733793103448276e-05, "loss": 0.3432, "step": 4047 }, { "epoch": 13.958620689655172, "grad_norm": 0.7954934239387512, "learning_rate": 1.6737931034482758e-05, "loss": 0.3402, "step": 4048 }, { "epoch": 13.962068965517242, "grad_norm": 0.9437413811683655, "learning_rate": 1.6742068965517243e-05, "loss": 0.3421, "step": 4049 }, { "epoch": 13.96551724137931, "grad_norm": 0.9090454578399658, "learning_rate": 1.6746206896551725e-05, "loss": 0.3044, "step": 4050 }, { "epoch": 13.96896551724138, "grad_norm": 0.7008870840072632, "learning_rate": 1.6750344827586207e-05, "loss": 0.3032, "step": 4051 }, { "epoch": 13.972413793103449, "grad_norm": 0.9286815524101257, "learning_rate": 1.675448275862069e-05, "loss": 0.2831, "step": 4052 }, { "epoch": 13.975862068965517, "grad_norm": 0.8409700989723206, "learning_rate": 1.675862068965517e-05, "loss": 0.3301, "step": 4053 }, { "epoch": 13.979310344827587, "grad_norm": 0.6830921173095703, "learning_rate": 1.6762758620689657e-05, "loss": 0.3144, "step": 4054 }, { "epoch": 13.982758620689655, "grad_norm": 1.0106072425842285, "learning_rate": 1.6766896551724135e-05, "loss": 0.3248, "step": 4055 }, { "epoch": 13.986206896551725, "grad_norm": 0.9730004072189331, "learning_rate": 1.677103448275862e-05, "loss": 0.3386, "step": 4056 }, { "epoch": 13.989655172413793, "grad_norm": 0.9547684192657471, "learning_rate": 1.6775172413793103e-05, "loss": 0.3224, "step": 4057 }, { "epoch": 13.993103448275862, "grad_norm": 0.9756284952163696, "learning_rate": 1.6779310344827588e-05, "loss": 0.281, "step": 4058 }, { "epoch": 13.99655172413793, "grad_norm": 1.286540150642395, "learning_rate": 1.6783448275862067e-05, "loss": 0.3595, "step": 4059 }, { "epoch": 14.0, "grad_norm": 1.7017902135849, "learning_rate": 1.6787586206896552e-05, "loss": 0.4668, "step": 4060 }, { "epoch": 14.00344827586207, "grad_norm": 0.8973599076271057, "learning_rate": 1.6791724137931034e-05, "loss": 0.4203, "step": 4061 }, { "epoch": 14.006896551724138, "grad_norm": 0.8756121397018433, "learning_rate": 1.679586206896552e-05, "loss": 0.3278, "step": 4062 }, { "epoch": 14.010344827586207, "grad_norm": 0.9426754713058472, "learning_rate": 1.6800000000000002e-05, "loss": 0.3335, "step": 4063 }, { "epoch": 14.013793103448275, "grad_norm": 0.7915513515472412, "learning_rate": 1.6804137931034484e-05, "loss": 0.2966, "step": 4064 }, { "epoch": 14.017241379310345, "grad_norm": 1.057394027709961, "learning_rate": 1.6808275862068966e-05, "loss": 0.3257, "step": 4065 }, { "epoch": 14.020689655172413, "grad_norm": 0.6377366185188293, "learning_rate": 1.6812413793103448e-05, "loss": 0.295, "step": 4066 }, { "epoch": 14.024137931034483, "grad_norm": 0.8745348453521729, "learning_rate": 1.6816551724137933e-05, "loss": 0.3215, "step": 4067 }, { "epoch": 14.027586206896551, "grad_norm": 0.8430737257003784, "learning_rate": 1.6820689655172412e-05, "loss": 0.3092, "step": 4068 }, { "epoch": 14.03103448275862, "grad_norm": 1.1393035650253296, "learning_rate": 1.6824827586206897e-05, "loss": 0.323, "step": 4069 }, { "epoch": 14.03448275862069, "grad_norm": 1.2054030895233154, "learning_rate": 1.682896551724138e-05, "loss": 0.2945, "step": 4070 }, { "epoch": 14.037931034482758, "grad_norm": 0.8736142516136169, "learning_rate": 1.6833103448275865e-05, "loss": 0.2679, "step": 4071 }, { "epoch": 14.041379310344828, "grad_norm": 0.7867350578308105, "learning_rate": 1.6837241379310343e-05, "loss": 0.3431, "step": 4072 }, { "epoch": 14.044827586206896, "grad_norm": 0.7596640586853027, "learning_rate": 1.684137931034483e-05, "loss": 0.3012, "step": 4073 }, { "epoch": 14.048275862068966, "grad_norm": 0.7210896015167236, "learning_rate": 1.684551724137931e-05, "loss": 0.3092, "step": 4074 }, { "epoch": 14.051724137931034, "grad_norm": 1.4068528413772583, "learning_rate": 1.6849655172413796e-05, "loss": 0.2982, "step": 4075 }, { "epoch": 14.055172413793104, "grad_norm": 0.7773165106773376, "learning_rate": 1.6853793103448275e-05, "loss": 0.3024, "step": 4076 }, { "epoch": 14.058620689655172, "grad_norm": 0.9569291472434998, "learning_rate": 1.685793103448276e-05, "loss": 0.3093, "step": 4077 }, { "epoch": 14.062068965517241, "grad_norm": 1.4614214897155762, "learning_rate": 1.6862068965517242e-05, "loss": 0.281, "step": 4078 }, { "epoch": 14.065517241379311, "grad_norm": 0.8411420583724976, "learning_rate": 1.6866206896551724e-05, "loss": 0.2981, "step": 4079 }, { "epoch": 14.068965517241379, "grad_norm": 1.4182322025299072, "learning_rate": 1.6870344827586206e-05, "loss": 0.3114, "step": 4080 }, { "epoch": 14.072413793103449, "grad_norm": 1.1786797046661377, "learning_rate": 1.6874482758620688e-05, "loss": 0.3658, "step": 4081 }, { "epoch": 14.075862068965517, "grad_norm": 0.9557023048400879, "learning_rate": 1.6878620689655174e-05, "loss": 0.2848, "step": 4082 }, { "epoch": 14.079310344827586, "grad_norm": 1.841140866279602, "learning_rate": 1.6882758620689656e-05, "loss": 0.312, "step": 4083 }, { "epoch": 14.082758620689654, "grad_norm": 1.8769611120224, "learning_rate": 1.6886896551724138e-05, "loss": 0.33, "step": 4084 }, { "epoch": 14.086206896551724, "grad_norm": 1.4786890745162964, "learning_rate": 1.689103448275862e-05, "loss": 0.4504, "step": 4085 }, { "epoch": 14.089655172413794, "grad_norm": 0.8584338426589966, "learning_rate": 1.6895172413793105e-05, "loss": 0.3495, "step": 4086 }, { "epoch": 14.093103448275862, "grad_norm": 0.5867743492126465, "learning_rate": 1.6899310344827587e-05, "loss": 0.3062, "step": 4087 }, { "epoch": 14.096551724137932, "grad_norm": 0.7482850551605225, "learning_rate": 1.690344827586207e-05, "loss": 0.3106, "step": 4088 }, { "epoch": 14.1, "grad_norm": 0.8547123670578003, "learning_rate": 1.690758620689655e-05, "loss": 0.3241, "step": 4089 }, { "epoch": 14.10344827586207, "grad_norm": 0.9668439626693726, "learning_rate": 1.6911724137931037e-05, "loss": 0.3275, "step": 4090 }, { "epoch": 14.106896551724137, "grad_norm": 0.8815339803695679, "learning_rate": 1.691586206896552e-05, "loss": 0.2951, "step": 4091 }, { "epoch": 14.110344827586207, "grad_norm": 0.7625361084938049, "learning_rate": 1.6919999999999997e-05, "loss": 0.3102, "step": 4092 }, { "epoch": 14.113793103448275, "grad_norm": 0.6844339966773987, "learning_rate": 1.6924137931034483e-05, "loss": 0.2997, "step": 4093 }, { "epoch": 14.117241379310345, "grad_norm": 1.0101327896118164, "learning_rate": 1.6928275862068965e-05, "loss": 0.3041, "step": 4094 }, { "epoch": 14.120689655172415, "grad_norm": 1.0258785486221313, "learning_rate": 1.693241379310345e-05, "loss": 0.2942, "step": 4095 }, { "epoch": 14.124137931034483, "grad_norm": 0.8295995593070984, "learning_rate": 1.6936551724137932e-05, "loss": 0.2973, "step": 4096 }, { "epoch": 14.127586206896552, "grad_norm": 1.173783779144287, "learning_rate": 1.6940689655172414e-05, "loss": 0.2875, "step": 4097 }, { "epoch": 14.13103448275862, "grad_norm": 1.0074119567871094, "learning_rate": 1.6944827586206896e-05, "loss": 0.2799, "step": 4098 }, { "epoch": 14.13448275862069, "grad_norm": 0.6996956467628479, "learning_rate": 1.694896551724138e-05, "loss": 0.2945, "step": 4099 }, { "epoch": 14.137931034482758, "grad_norm": 0.8388842344284058, "learning_rate": 1.6953103448275864e-05, "loss": 0.3083, "step": 4100 }, { "epoch": 14.141379310344828, "grad_norm": 0.7555145621299744, "learning_rate": 1.6957241379310346e-05, "loss": 0.302, "step": 4101 }, { "epoch": 14.144827586206896, "grad_norm": 0.7586984634399414, "learning_rate": 1.6961379310344828e-05, "loss": 0.3151, "step": 4102 }, { "epoch": 14.148275862068965, "grad_norm": 0.8745262026786804, "learning_rate": 1.6965517241379313e-05, "loss": 0.2793, "step": 4103 }, { "epoch": 14.151724137931035, "grad_norm": 1.3518626689910889, "learning_rate": 1.6969655172413795e-05, "loss": 0.345, "step": 4104 }, { "epoch": 14.155172413793103, "grad_norm": 0.9283069968223572, "learning_rate": 1.6973793103448274e-05, "loss": 0.3166, "step": 4105 }, { "epoch": 14.158620689655173, "grad_norm": 1.2397725582122803, "learning_rate": 1.697793103448276e-05, "loss": 0.3493, "step": 4106 }, { "epoch": 14.162068965517241, "grad_norm": 1.3023899793624878, "learning_rate": 1.698206896551724e-05, "loss": 0.3058, "step": 4107 }, { "epoch": 14.16551724137931, "grad_norm": 2.35107421875, "learning_rate": 1.6986206896551726e-05, "loss": 0.318, "step": 4108 }, { "epoch": 14.168965517241379, "grad_norm": 1.327130913734436, "learning_rate": 1.6990344827586205e-05, "loss": 0.3475, "step": 4109 }, { "epoch": 14.172413793103448, "grad_norm": 2.3049306869506836, "learning_rate": 1.699448275862069e-05, "loss": 0.4945, "step": 4110 }, { "epoch": 14.175862068965516, "grad_norm": 1.2233330011367798, "learning_rate": 1.6998620689655173e-05, "loss": 0.3752, "step": 4111 }, { "epoch": 14.179310344827586, "grad_norm": 0.6554763317108154, "learning_rate": 1.7002758620689658e-05, "loss": 0.3534, "step": 4112 }, { "epoch": 14.182758620689656, "grad_norm": 0.8183402419090271, "learning_rate": 1.7006896551724137e-05, "loss": 0.3199, "step": 4113 }, { "epoch": 14.186206896551724, "grad_norm": 0.5861454606056213, "learning_rate": 1.7011034482758622e-05, "loss": 0.2721, "step": 4114 }, { "epoch": 14.189655172413794, "grad_norm": 0.8426320552825928, "learning_rate": 1.7015172413793104e-05, "loss": 0.2781, "step": 4115 }, { "epoch": 14.193103448275862, "grad_norm": 1.3128894567489624, "learning_rate": 1.701931034482759e-05, "loss": 0.3081, "step": 4116 }, { "epoch": 14.196551724137931, "grad_norm": 0.8022948503494263, "learning_rate": 1.7023448275862068e-05, "loss": 0.2804, "step": 4117 }, { "epoch": 14.2, "grad_norm": 1.100754737854004, "learning_rate": 1.702758620689655e-05, "loss": 0.3178, "step": 4118 }, { "epoch": 14.203448275862069, "grad_norm": 0.9630993604660034, "learning_rate": 1.7031724137931035e-05, "loss": 0.2888, "step": 4119 }, { "epoch": 14.206896551724139, "grad_norm": 0.8838567733764648, "learning_rate": 1.7035862068965517e-05, "loss": 0.2771, "step": 4120 }, { "epoch": 14.210344827586207, "grad_norm": 0.8218742609024048, "learning_rate": 1.704e-05, "loss": 0.2965, "step": 4121 }, { "epoch": 14.213793103448277, "grad_norm": 0.89113849401474, "learning_rate": 1.704413793103448e-05, "loss": 0.387, "step": 4122 }, { "epoch": 14.217241379310344, "grad_norm": 0.6839450001716614, "learning_rate": 1.7048275862068967e-05, "loss": 0.3036, "step": 4123 }, { "epoch": 14.220689655172414, "grad_norm": 0.9903780817985535, "learning_rate": 1.705241379310345e-05, "loss": 0.2838, "step": 4124 }, { "epoch": 14.224137931034482, "grad_norm": 0.8893857598304749, "learning_rate": 1.705655172413793e-05, "loss": 0.3147, "step": 4125 }, { "epoch": 14.227586206896552, "grad_norm": 1.2687599658966064, "learning_rate": 1.7060689655172413e-05, "loss": 0.3182, "step": 4126 }, { "epoch": 14.23103448275862, "grad_norm": 0.9061092734336853, "learning_rate": 1.70648275862069e-05, "loss": 0.2717, "step": 4127 }, { "epoch": 14.23448275862069, "grad_norm": 1.0977020263671875, "learning_rate": 1.706896551724138e-05, "loss": 0.3027, "step": 4128 }, { "epoch": 14.23793103448276, "grad_norm": 0.9422127604484558, "learning_rate": 1.7073103448275866e-05, "loss": 0.316, "step": 4129 }, { "epoch": 14.241379310344827, "grad_norm": 0.9826316237449646, "learning_rate": 1.7077241379310344e-05, "loss": 0.3206, "step": 4130 }, { "epoch": 14.244827586206897, "grad_norm": 1.164278507232666, "learning_rate": 1.7081379310344826e-05, "loss": 0.2774, "step": 4131 }, { "epoch": 14.248275862068965, "grad_norm": 1.1615824699401855, "learning_rate": 1.7085517241379312e-05, "loss": 0.3473, "step": 4132 }, { "epoch": 14.251724137931035, "grad_norm": 0.9724471569061279, "learning_rate": 1.7089655172413794e-05, "loss": 0.3251, "step": 4133 }, { "epoch": 14.255172413793103, "grad_norm": 1.7582870721817017, "learning_rate": 1.7093793103448276e-05, "loss": 0.3446, "step": 4134 }, { "epoch": 14.258620689655173, "grad_norm": 1.9728621244430542, "learning_rate": 1.7097931034482758e-05, "loss": 0.4406, "step": 4135 }, { "epoch": 14.26206896551724, "grad_norm": 0.8662994503974915, "learning_rate": 1.7102068965517243e-05, "loss": 0.3934, "step": 4136 }, { "epoch": 14.26551724137931, "grad_norm": 0.7480033040046692, "learning_rate": 1.7106206896551725e-05, "loss": 0.3153, "step": 4137 }, { "epoch": 14.26896551724138, "grad_norm": 0.7968465685844421, "learning_rate": 1.7110344827586207e-05, "loss": 0.3623, "step": 4138 }, { "epoch": 14.272413793103448, "grad_norm": 0.9480080008506775, "learning_rate": 1.711448275862069e-05, "loss": 0.3225, "step": 4139 }, { "epoch": 14.275862068965518, "grad_norm": 0.9142926335334778, "learning_rate": 1.7118620689655175e-05, "loss": 0.2981, "step": 4140 }, { "epoch": 14.279310344827586, "grad_norm": 0.7925171852111816, "learning_rate": 1.7122758620689657e-05, "loss": 0.2928, "step": 4141 }, { "epoch": 14.282758620689656, "grad_norm": 0.8076333999633789, "learning_rate": 1.712689655172414e-05, "loss": 0.301, "step": 4142 }, { "epoch": 14.286206896551723, "grad_norm": 0.8389571309089661, "learning_rate": 1.713103448275862e-05, "loss": 0.3543, "step": 4143 }, { "epoch": 14.289655172413793, "grad_norm": 2.3306000232696533, "learning_rate": 1.7135172413793103e-05, "loss": 0.2927, "step": 4144 }, { "epoch": 14.293103448275861, "grad_norm": 0.8094341158866882, "learning_rate": 1.7139310344827588e-05, "loss": 0.2627, "step": 4145 }, { "epoch": 14.296551724137931, "grad_norm": 0.937420666217804, "learning_rate": 1.7143448275862067e-05, "loss": 0.3352, "step": 4146 }, { "epoch": 14.3, "grad_norm": 1.097713589668274, "learning_rate": 1.7147586206896552e-05, "loss": 0.2779, "step": 4147 }, { "epoch": 14.303448275862069, "grad_norm": 0.898278534412384, "learning_rate": 1.7151724137931034e-05, "loss": 0.2675, "step": 4148 }, { "epoch": 14.306896551724138, "grad_norm": 0.8831939697265625, "learning_rate": 1.715586206896552e-05, "loss": 0.3084, "step": 4149 }, { "epoch": 14.310344827586206, "grad_norm": 1.3500841856002808, "learning_rate": 1.716e-05, "loss": 0.3333, "step": 4150 }, { "epoch": 14.313793103448276, "grad_norm": 1.167346477508545, "learning_rate": 1.7164137931034484e-05, "loss": 0.2815, "step": 4151 }, { "epoch": 14.317241379310344, "grad_norm": 1.064124345779419, "learning_rate": 1.7168275862068966e-05, "loss": 0.3285, "step": 4152 }, { "epoch": 14.320689655172414, "grad_norm": 1.5211207866668701, "learning_rate": 1.717241379310345e-05, "loss": 0.3033, "step": 4153 }, { "epoch": 14.324137931034482, "grad_norm": 0.9542819857597351, "learning_rate": 1.717655172413793e-05, "loss": 0.2983, "step": 4154 }, { "epoch": 14.327586206896552, "grad_norm": 1.326140284538269, "learning_rate": 1.7180689655172415e-05, "loss": 0.3293, "step": 4155 }, { "epoch": 14.331034482758621, "grad_norm": 1.615631103515625, "learning_rate": 1.7184827586206897e-05, "loss": 0.3122, "step": 4156 }, { "epoch": 14.33448275862069, "grad_norm": 1.030387043952942, "learning_rate": 1.718896551724138e-05, "loss": 0.3203, "step": 4157 }, { "epoch": 14.337931034482759, "grad_norm": 1.7263295650482178, "learning_rate": 1.719310344827586e-05, "loss": 0.3151, "step": 4158 }, { "epoch": 14.341379310344827, "grad_norm": 2.6618385314941406, "learning_rate": 1.7197241379310343e-05, "loss": 0.3181, "step": 4159 }, { "epoch": 14.344827586206897, "grad_norm": 1.8291850090026855, "learning_rate": 1.720137931034483e-05, "loss": 0.3836, "step": 4160 }, { "epoch": 14.348275862068965, "grad_norm": 0.7538983821868896, "learning_rate": 1.720551724137931e-05, "loss": 0.3503, "step": 4161 }, { "epoch": 14.351724137931035, "grad_norm": 0.6807737946510315, "learning_rate": 1.7209655172413796e-05, "loss": 0.3033, "step": 4162 }, { "epoch": 14.355172413793104, "grad_norm": 0.7018525004386902, "learning_rate": 1.7213793103448275e-05, "loss": 0.3308, "step": 4163 }, { "epoch": 14.358620689655172, "grad_norm": 0.8206633925437927, "learning_rate": 1.721793103448276e-05, "loss": 0.3542, "step": 4164 }, { "epoch": 14.362068965517242, "grad_norm": 1.5551363229751587, "learning_rate": 1.7222068965517242e-05, "loss": 0.3066, "step": 4165 }, { "epoch": 14.36551724137931, "grad_norm": 0.8463552594184875, "learning_rate": 1.7226206896551728e-05, "loss": 0.3052, "step": 4166 }, { "epoch": 14.36896551724138, "grad_norm": 0.718353807926178, "learning_rate": 1.7230344827586206e-05, "loss": 0.3225, "step": 4167 }, { "epoch": 14.372413793103448, "grad_norm": 0.7300383448600769, "learning_rate": 1.7234482758620688e-05, "loss": 0.2971, "step": 4168 }, { "epoch": 14.375862068965517, "grad_norm": 1.0545650720596313, "learning_rate": 1.7238620689655174e-05, "loss": 0.2816, "step": 4169 }, { "epoch": 14.379310344827585, "grad_norm": 0.9636167883872986, "learning_rate": 1.7242758620689656e-05, "loss": 0.3036, "step": 4170 }, { "epoch": 14.382758620689655, "grad_norm": 1.0122780799865723, "learning_rate": 1.7246896551724138e-05, "loss": 0.2792, "step": 4171 }, { "epoch": 14.386206896551725, "grad_norm": 0.8043379187583923, "learning_rate": 1.725103448275862e-05, "loss": 0.3084, "step": 4172 }, { "epoch": 14.389655172413793, "grad_norm": 1.847799301147461, "learning_rate": 1.7255172413793105e-05, "loss": 0.292, "step": 4173 }, { "epoch": 14.393103448275863, "grad_norm": 1.033096194267273, "learning_rate": 1.7259310344827587e-05, "loss": 0.3059, "step": 4174 }, { "epoch": 14.39655172413793, "grad_norm": 1.106238603591919, "learning_rate": 1.726344827586207e-05, "loss": 0.3126, "step": 4175 }, { "epoch": 14.4, "grad_norm": 0.8882419466972351, "learning_rate": 1.726758620689655e-05, "loss": 0.2975, "step": 4176 }, { "epoch": 14.403448275862068, "grad_norm": 1.0109771490097046, "learning_rate": 1.7271724137931037e-05, "loss": 0.2995, "step": 4177 }, { "epoch": 14.406896551724138, "grad_norm": 0.9801283478736877, "learning_rate": 1.727586206896552e-05, "loss": 0.3086, "step": 4178 }, { "epoch": 14.410344827586206, "grad_norm": 0.998375654220581, "learning_rate": 1.728e-05, "loss": 0.2942, "step": 4179 }, { "epoch": 14.413793103448276, "grad_norm": 1.2293163537979126, "learning_rate": 1.7284137931034483e-05, "loss": 0.2946, "step": 4180 }, { "epoch": 14.417241379310346, "grad_norm": 1.1518784761428833, "learning_rate": 1.7288275862068965e-05, "loss": 0.3414, "step": 4181 }, { "epoch": 14.420689655172414, "grad_norm": 1.2795385122299194, "learning_rate": 1.729241379310345e-05, "loss": 0.3023, "step": 4182 }, { "epoch": 14.424137931034483, "grad_norm": 1.7274311780929565, "learning_rate": 1.729655172413793e-05, "loss": 0.3152, "step": 4183 }, { "epoch": 14.427586206896551, "grad_norm": 1.7022892236709595, "learning_rate": 1.7300689655172414e-05, "loss": 0.4368, "step": 4184 }, { "epoch": 14.431034482758621, "grad_norm": 5.12580680847168, "learning_rate": 1.7304827586206896e-05, "loss": 0.4372, "step": 4185 }, { "epoch": 14.434482758620689, "grad_norm": 0.8882464170455933, "learning_rate": 1.730896551724138e-05, "loss": 0.3532, "step": 4186 }, { "epoch": 14.437931034482759, "grad_norm": 0.9772845506668091, "learning_rate": 1.731310344827586e-05, "loss": 0.2956, "step": 4187 }, { "epoch": 14.441379310344828, "grad_norm": 1.0550768375396729, "learning_rate": 1.7317241379310346e-05, "loss": 0.3175, "step": 4188 }, { "epoch": 14.444827586206896, "grad_norm": 0.7187688946723938, "learning_rate": 1.7321379310344828e-05, "loss": 0.3017, "step": 4189 }, { "epoch": 14.448275862068966, "grad_norm": 0.834663987159729, "learning_rate": 1.7325517241379313e-05, "loss": 0.3026, "step": 4190 }, { "epoch": 14.451724137931034, "grad_norm": 0.9397502541542053, "learning_rate": 1.732965517241379e-05, "loss": 0.3134, "step": 4191 }, { "epoch": 14.455172413793104, "grad_norm": 0.6452609300613403, "learning_rate": 1.7333793103448277e-05, "loss": 0.2948, "step": 4192 }, { "epoch": 14.458620689655172, "grad_norm": 0.8521568775177002, "learning_rate": 1.733793103448276e-05, "loss": 0.3322, "step": 4193 }, { "epoch": 14.462068965517242, "grad_norm": 0.727425754070282, "learning_rate": 1.734206896551724e-05, "loss": 0.2705, "step": 4194 }, { "epoch": 14.46551724137931, "grad_norm": 1.2399868965148926, "learning_rate": 1.7346206896551726e-05, "loss": 0.3401, "step": 4195 }, { "epoch": 14.46896551724138, "grad_norm": 0.9414185881614685, "learning_rate": 1.7350344827586205e-05, "loss": 0.2995, "step": 4196 }, { "epoch": 14.472413793103449, "grad_norm": 0.7327996492385864, "learning_rate": 1.735448275862069e-05, "loss": 0.2884, "step": 4197 }, { "epoch": 14.475862068965517, "grad_norm": 2.3115234375, "learning_rate": 1.7358620689655172e-05, "loss": 0.2634, "step": 4198 }, { "epoch": 14.479310344827587, "grad_norm": 1.3241446018218994, "learning_rate": 1.7362758620689658e-05, "loss": 0.3305, "step": 4199 }, { "epoch": 14.482758620689655, "grad_norm": 0.7656068205833435, "learning_rate": 1.7366896551724137e-05, "loss": 0.3237, "step": 4200 }, { "epoch": 14.486206896551725, "grad_norm": 0.9288299083709717, "learning_rate": 1.7371034482758622e-05, "loss": 0.2996, "step": 4201 }, { "epoch": 14.489655172413793, "grad_norm": 1.9932737350463867, "learning_rate": 1.7375172413793104e-05, "loss": 0.3197, "step": 4202 }, { "epoch": 14.493103448275862, "grad_norm": 2.2983713150024414, "learning_rate": 1.737931034482759e-05, "loss": 0.307, "step": 4203 }, { "epoch": 14.49655172413793, "grad_norm": 0.8504494428634644, "learning_rate": 1.7383448275862068e-05, "loss": 0.291, "step": 4204 }, { "epoch": 14.5, "grad_norm": 0.8508273959159851, "learning_rate": 1.7387586206896553e-05, "loss": 0.2916, "step": 4205 }, { "epoch": 14.50344827586207, "grad_norm": 1.2302919626235962, "learning_rate": 1.7391724137931035e-05, "loss": 0.3029, "step": 4206 }, { "epoch": 14.506896551724138, "grad_norm": 1.0406384468078613, "learning_rate": 1.7395862068965517e-05, "loss": 0.3223, "step": 4207 }, { "epoch": 14.510344827586207, "grad_norm": 1.4936109781265259, "learning_rate": 1.74e-05, "loss": 0.3086, "step": 4208 }, { "epoch": 14.513793103448275, "grad_norm": 1.5220822095870972, "learning_rate": 1.740413793103448e-05, "loss": 0.335, "step": 4209 }, { "epoch": 14.517241379310345, "grad_norm": 4.95164155960083, "learning_rate": 1.7408275862068967e-05, "loss": 0.4517, "step": 4210 }, { "epoch": 14.520689655172413, "grad_norm": 0.5830458402633667, "learning_rate": 1.741241379310345e-05, "loss": 0.3209, "step": 4211 }, { "epoch": 14.524137931034483, "grad_norm": 0.7167479991912842, "learning_rate": 1.741655172413793e-05, "loss": 0.3396, "step": 4212 }, { "epoch": 14.527586206896551, "grad_norm": 0.6710246205329895, "learning_rate": 1.7420689655172413e-05, "loss": 0.2908, "step": 4213 }, { "epoch": 14.53103448275862, "grad_norm": 0.8314470648765564, "learning_rate": 1.74248275862069e-05, "loss": 0.3015, "step": 4214 }, { "epoch": 14.53448275862069, "grad_norm": 0.7942419648170471, "learning_rate": 1.742896551724138e-05, "loss": 0.3178, "step": 4215 }, { "epoch": 14.537931034482758, "grad_norm": 1.1045475006103516, "learning_rate": 1.7433103448275862e-05, "loss": 0.318, "step": 4216 }, { "epoch": 14.541379310344828, "grad_norm": 0.6983554363250732, "learning_rate": 1.7437241379310344e-05, "loss": 0.304, "step": 4217 }, { "epoch": 14.544827586206896, "grad_norm": 1.0372270345687866, "learning_rate": 1.744137931034483e-05, "loss": 0.3189, "step": 4218 }, { "epoch": 14.548275862068966, "grad_norm": 1.8750479221343994, "learning_rate": 1.7445517241379312e-05, "loss": 0.2897, "step": 4219 }, { "epoch": 14.551724137931034, "grad_norm": 0.893807590007782, "learning_rate": 1.744965517241379e-05, "loss": 0.3168, "step": 4220 }, { "epoch": 14.555172413793104, "grad_norm": 1.047914743423462, "learning_rate": 1.7453793103448276e-05, "loss": 0.2822, "step": 4221 }, { "epoch": 14.558620689655172, "grad_norm": 1.075391411781311, "learning_rate": 1.7457931034482758e-05, "loss": 0.3131, "step": 4222 }, { "epoch": 14.562068965517241, "grad_norm": 0.92901611328125, "learning_rate": 1.7462068965517243e-05, "loss": 0.2946, "step": 4223 }, { "epoch": 14.565517241379311, "grad_norm": 0.862028956413269, "learning_rate": 1.7466206896551722e-05, "loss": 0.3188, "step": 4224 }, { "epoch": 14.568965517241379, "grad_norm": 0.8392850756645203, "learning_rate": 1.7470344827586207e-05, "loss": 0.3117, "step": 4225 }, { "epoch": 14.572413793103449, "grad_norm": 0.9723548889160156, "learning_rate": 1.747448275862069e-05, "loss": 0.2893, "step": 4226 }, { "epoch": 14.575862068965517, "grad_norm": 0.7748025059700012, "learning_rate": 1.7478620689655175e-05, "loss": 0.2425, "step": 4227 }, { "epoch": 14.579310344827586, "grad_norm": 0.7350221872329712, "learning_rate": 1.7482758620689657e-05, "loss": 0.2943, "step": 4228 }, { "epoch": 14.582758620689654, "grad_norm": 1.0769339799880981, "learning_rate": 1.748689655172414e-05, "loss": 0.2947, "step": 4229 }, { "epoch": 14.586206896551724, "grad_norm": 1.3465596437454224, "learning_rate": 1.749103448275862e-05, "loss": 0.3044, "step": 4230 }, { "epoch": 14.589655172413792, "grad_norm": 0.8672372698783875, "learning_rate": 1.7495172413793106e-05, "loss": 0.3257, "step": 4231 }, { "epoch": 14.593103448275862, "grad_norm": 0.8843567967414856, "learning_rate": 1.7499310344827588e-05, "loss": 0.2847, "step": 4232 }, { "epoch": 14.596551724137932, "grad_norm": 1.718654751777649, "learning_rate": 1.7503448275862067e-05, "loss": 0.3324, "step": 4233 }, { "epoch": 14.6, "grad_norm": 1.4447802305221558, "learning_rate": 1.7507586206896552e-05, "loss": 0.3326, "step": 4234 }, { "epoch": 14.60344827586207, "grad_norm": 1.650471806526184, "learning_rate": 1.7511724137931034e-05, "loss": 0.4228, "step": 4235 }, { "epoch": 14.606896551724137, "grad_norm": 0.8412590622901917, "learning_rate": 1.751586206896552e-05, "loss": 0.3846, "step": 4236 }, { "epoch": 14.610344827586207, "grad_norm": 0.9063007831573486, "learning_rate": 1.7519999999999998e-05, "loss": 0.3225, "step": 4237 }, { "epoch": 14.613793103448275, "grad_norm": 1.0875996351242065, "learning_rate": 1.7524137931034484e-05, "loss": 0.3104, "step": 4238 }, { "epoch": 14.617241379310345, "grad_norm": 0.7299657464027405, "learning_rate": 1.7528275862068966e-05, "loss": 0.3149, "step": 4239 }, { "epoch": 14.620689655172415, "grad_norm": 0.7426931858062744, "learning_rate": 1.753241379310345e-05, "loss": 0.3168, "step": 4240 }, { "epoch": 14.624137931034483, "grad_norm": 0.9645451903343201, "learning_rate": 1.753655172413793e-05, "loss": 0.313, "step": 4241 }, { "epoch": 14.627586206896552, "grad_norm": 0.814951479434967, "learning_rate": 1.7540689655172415e-05, "loss": 0.3031, "step": 4242 }, { "epoch": 14.63103448275862, "grad_norm": 0.7481603622436523, "learning_rate": 1.7544827586206897e-05, "loss": 0.3285, "step": 4243 }, { "epoch": 14.63448275862069, "grad_norm": 1.0489627122879028, "learning_rate": 1.7548965517241383e-05, "loss": 0.2852, "step": 4244 }, { "epoch": 14.637931034482758, "grad_norm": 0.8762294054031372, "learning_rate": 1.755310344827586e-05, "loss": 0.3138, "step": 4245 }, { "epoch": 14.641379310344828, "grad_norm": 1.0365653038024902, "learning_rate": 1.7557241379310343e-05, "loss": 0.3008, "step": 4246 }, { "epoch": 14.644827586206896, "grad_norm": 1.326570987701416, "learning_rate": 1.756137931034483e-05, "loss": 0.3126, "step": 4247 }, { "epoch": 14.648275862068965, "grad_norm": 0.9206026196479797, "learning_rate": 1.756551724137931e-05, "loss": 0.2922, "step": 4248 }, { "epoch": 14.651724137931035, "grad_norm": 1.7360583543777466, "learning_rate": 1.7569655172413793e-05, "loss": 0.2931, "step": 4249 }, { "epoch": 14.655172413793103, "grad_norm": 1.055824875831604, "learning_rate": 1.7573793103448275e-05, "loss": 0.3178, "step": 4250 }, { "epoch": 14.658620689655173, "grad_norm": 1.0141468048095703, "learning_rate": 1.757793103448276e-05, "loss": 0.283, "step": 4251 }, { "epoch": 14.662068965517241, "grad_norm": 1.604121208190918, "learning_rate": 1.7582068965517242e-05, "loss": 0.3028, "step": 4252 }, { "epoch": 14.66551724137931, "grad_norm": 1.146384596824646, "learning_rate": 1.7586206896551724e-05, "loss": 0.2906, "step": 4253 }, { "epoch": 14.668965517241379, "grad_norm": 0.9156032204627991, "learning_rate": 1.7590344827586206e-05, "loss": 0.3267, "step": 4254 }, { "epoch": 14.672413793103448, "grad_norm": 0.9125036597251892, "learning_rate": 1.759448275862069e-05, "loss": 0.2846, "step": 4255 }, { "epoch": 14.675862068965518, "grad_norm": 1.293255090713501, "learning_rate": 1.7598620689655174e-05, "loss": 0.2993, "step": 4256 }, { "epoch": 14.679310344827586, "grad_norm": 1.1992416381835938, "learning_rate": 1.7602758620689656e-05, "loss": 0.2942, "step": 4257 }, { "epoch": 14.682758620689656, "grad_norm": 1.7519687414169312, "learning_rate": 1.7606896551724138e-05, "loss": 0.3078, "step": 4258 }, { "epoch": 14.686206896551724, "grad_norm": 1.646541953086853, "learning_rate": 1.761103448275862e-05, "loss": 0.3367, "step": 4259 }, { "epoch": 14.689655172413794, "grad_norm": 2.4698641300201416, "learning_rate": 1.7615172413793105e-05, "loss": 0.4759, "step": 4260 }, { "epoch": 14.693103448275862, "grad_norm": 0.787041962146759, "learning_rate": 1.7619310344827587e-05, "loss": 0.3107, "step": 4261 }, { "epoch": 14.696551724137931, "grad_norm": 0.7262738347053528, "learning_rate": 1.762344827586207e-05, "loss": 0.3334, "step": 4262 }, { "epoch": 14.7, "grad_norm": 0.6954865455627441, "learning_rate": 1.762758620689655e-05, "loss": 0.307, "step": 4263 }, { "epoch": 14.703448275862069, "grad_norm": 1.282780408859253, "learning_rate": 1.7631724137931036e-05, "loss": 0.2968, "step": 4264 }, { "epoch": 14.706896551724139, "grad_norm": 0.816312849521637, "learning_rate": 1.763586206896552e-05, "loss": 0.3082, "step": 4265 }, { "epoch": 14.710344827586207, "grad_norm": 0.689563512802124, "learning_rate": 1.764e-05, "loss": 0.3038, "step": 4266 }, { "epoch": 14.713793103448277, "grad_norm": 0.935608446598053, "learning_rate": 1.7644137931034483e-05, "loss": 0.3387, "step": 4267 }, { "epoch": 14.717241379310344, "grad_norm": 1.071913480758667, "learning_rate": 1.7648275862068968e-05, "loss": 0.3127, "step": 4268 }, { "epoch": 14.720689655172414, "grad_norm": 1.1550700664520264, "learning_rate": 1.765241379310345e-05, "loss": 0.2832, "step": 4269 }, { "epoch": 14.724137931034482, "grad_norm": 0.7970525622367859, "learning_rate": 1.7656551724137932e-05, "loss": 0.2939, "step": 4270 }, { "epoch": 14.727586206896552, "grad_norm": 1.393227219581604, "learning_rate": 1.7660689655172414e-05, "loss": 0.3017, "step": 4271 }, { "epoch": 14.73103448275862, "grad_norm": 0.9809888601303101, "learning_rate": 1.7664827586206896e-05, "loss": 0.2767, "step": 4272 }, { "epoch": 14.73448275862069, "grad_norm": 0.9269981980323792, "learning_rate": 1.766896551724138e-05, "loss": 0.2927, "step": 4273 }, { "epoch": 14.73793103448276, "grad_norm": 0.8753629922866821, "learning_rate": 1.767310344827586e-05, "loss": 0.291, "step": 4274 }, { "epoch": 14.741379310344827, "grad_norm": 0.9382573366165161, "learning_rate": 1.7677241379310345e-05, "loss": 0.289, "step": 4275 }, { "epoch": 14.744827586206897, "grad_norm": 1.0123049020767212, "learning_rate": 1.7681379310344827e-05, "loss": 0.2957, "step": 4276 }, { "epoch": 14.748275862068965, "grad_norm": 0.8486573100090027, "learning_rate": 1.7685517241379313e-05, "loss": 0.3148, "step": 4277 }, { "epoch": 14.751724137931035, "grad_norm": 0.9981033205986023, "learning_rate": 1.768965517241379e-05, "loss": 0.2692, "step": 4278 }, { "epoch": 14.755172413793103, "grad_norm": 1.1099070310592651, "learning_rate": 1.7693793103448277e-05, "loss": 0.2724, "step": 4279 }, { "epoch": 14.758620689655173, "grad_norm": 1.09809410572052, "learning_rate": 1.769793103448276e-05, "loss": 0.2819, "step": 4280 }, { "epoch": 14.76206896551724, "grad_norm": 1.202150583267212, "learning_rate": 1.7702068965517244e-05, "loss": 0.3095, "step": 4281 }, { "epoch": 14.76551724137931, "grad_norm": 1.2171094417572021, "learning_rate": 1.7706206896551723e-05, "loss": 0.2826, "step": 4282 }, { "epoch": 14.76896551724138, "grad_norm": 1.2903356552124023, "learning_rate": 1.771034482758621e-05, "loss": 0.322, "step": 4283 }, { "epoch": 14.772413793103448, "grad_norm": 1.134581208229065, "learning_rate": 1.771448275862069e-05, "loss": 0.3193, "step": 4284 }, { "epoch": 14.775862068965518, "grad_norm": 3.639277935028076, "learning_rate": 1.7718620689655172e-05, "loss": 0.4614, "step": 4285 }, { "epoch": 14.779310344827586, "grad_norm": 0.9165431261062622, "learning_rate": 1.7722758620689654e-05, "loss": 0.3301, "step": 4286 }, { "epoch": 14.782758620689656, "grad_norm": 0.7634367346763611, "learning_rate": 1.7726896551724136e-05, "loss": 0.3275, "step": 4287 }, { "epoch": 14.786206896551723, "grad_norm": 0.7133443355560303, "learning_rate": 1.7731034482758622e-05, "loss": 0.3462, "step": 4288 }, { "epoch": 14.789655172413793, "grad_norm": 0.6396775245666504, "learning_rate": 1.7735172413793104e-05, "loss": 0.3046, "step": 4289 }, { "epoch": 14.793103448275861, "grad_norm": 0.808414101600647, "learning_rate": 1.7739310344827586e-05, "loss": 0.3237, "step": 4290 }, { "epoch": 14.796551724137931, "grad_norm": 0.93034428358078, "learning_rate": 1.7743448275862068e-05, "loss": 0.3298, "step": 4291 }, { "epoch": 14.8, "grad_norm": 0.7900945544242859, "learning_rate": 1.7747586206896553e-05, "loss": 0.3421, "step": 4292 }, { "epoch": 14.803448275862069, "grad_norm": 0.7460189461708069, "learning_rate": 1.7751724137931035e-05, "loss": 0.2821, "step": 4293 }, { "epoch": 14.806896551724138, "grad_norm": 0.5631550550460815, "learning_rate": 1.775586206896552e-05, "loss": 0.2935, "step": 4294 }, { "epoch": 14.810344827586206, "grad_norm": 0.6285091042518616, "learning_rate": 1.776e-05, "loss": 0.3031, "step": 4295 }, { "epoch": 14.813793103448276, "grad_norm": 0.8240830302238464, "learning_rate": 1.7764137931034485e-05, "loss": 0.2885, "step": 4296 }, { "epoch": 14.817241379310344, "grad_norm": 0.7088080644607544, "learning_rate": 1.7768275862068967e-05, "loss": 0.2868, "step": 4297 }, { "epoch": 14.820689655172414, "grad_norm": 1.805501103401184, "learning_rate": 1.777241379310345e-05, "loss": 0.317, "step": 4298 }, { "epoch": 14.824137931034482, "grad_norm": 0.9099631309509277, "learning_rate": 1.777655172413793e-05, "loss": 0.3138, "step": 4299 }, { "epoch": 14.827586206896552, "grad_norm": 0.7937362194061279, "learning_rate": 1.7780689655172413e-05, "loss": 0.2632, "step": 4300 }, { "epoch": 14.831034482758621, "grad_norm": 0.8622856736183167, "learning_rate": 1.7784827586206898e-05, "loss": 0.2995, "step": 4301 }, { "epoch": 14.83448275862069, "grad_norm": 0.8905354738235474, "learning_rate": 1.778896551724138e-05, "loss": 0.325, "step": 4302 }, { "epoch": 14.837931034482759, "grad_norm": 1.1880652904510498, "learning_rate": 1.7793103448275862e-05, "loss": 0.3071, "step": 4303 }, { "epoch": 14.841379310344827, "grad_norm": 0.8788727521896362, "learning_rate": 1.7797241379310344e-05, "loss": 0.3051, "step": 4304 }, { "epoch": 14.844827586206897, "grad_norm": 1.1706558465957642, "learning_rate": 1.780137931034483e-05, "loss": 0.2923, "step": 4305 }, { "epoch": 14.848275862068965, "grad_norm": 1.100715160369873, "learning_rate": 1.7805517241379312e-05, "loss": 0.3346, "step": 4306 }, { "epoch": 14.851724137931035, "grad_norm": 1.0929813385009766, "learning_rate": 1.7809655172413794e-05, "loss": 0.2865, "step": 4307 }, { "epoch": 14.855172413793104, "grad_norm": 1.0299581289291382, "learning_rate": 1.7813793103448276e-05, "loss": 0.3912, "step": 4308 }, { "epoch": 14.858620689655172, "grad_norm": 1.8122975826263428, "learning_rate": 1.781793103448276e-05, "loss": 0.3228, "step": 4309 }, { "epoch": 14.862068965517242, "grad_norm": 2.5564754009246826, "learning_rate": 1.7822068965517243e-05, "loss": 0.4545, "step": 4310 }, { "epoch": 14.86551724137931, "grad_norm": 0.7751657366752625, "learning_rate": 1.7826206896551722e-05, "loss": 0.3404, "step": 4311 }, { "epoch": 14.86896551724138, "grad_norm": 1.746566891670227, "learning_rate": 1.7830344827586207e-05, "loss": 0.2968, "step": 4312 }, { "epoch": 14.872413793103448, "grad_norm": 0.8748902082443237, "learning_rate": 1.783448275862069e-05, "loss": 0.3025, "step": 4313 }, { "epoch": 14.875862068965517, "grad_norm": 0.7253442406654358, "learning_rate": 1.7838620689655175e-05, "loss": 0.3269, "step": 4314 }, { "epoch": 14.879310344827585, "grad_norm": 0.8738021850585938, "learning_rate": 1.7842758620689653e-05, "loss": 0.2871, "step": 4315 }, { "epoch": 14.882758620689655, "grad_norm": 0.6264983415603638, "learning_rate": 1.784689655172414e-05, "loss": 0.2848, "step": 4316 }, { "epoch": 14.886206896551725, "grad_norm": 0.7645626664161682, "learning_rate": 1.785103448275862e-05, "loss": 0.3011, "step": 4317 }, { "epoch": 14.889655172413793, "grad_norm": 0.8220482468605042, "learning_rate": 1.7855172413793106e-05, "loss": 0.3156, "step": 4318 }, { "epoch": 14.893103448275863, "grad_norm": 0.9691508412361145, "learning_rate": 1.7859310344827585e-05, "loss": 0.28, "step": 4319 }, { "epoch": 14.89655172413793, "grad_norm": 0.7653795480728149, "learning_rate": 1.786344827586207e-05, "loss": 0.3009, "step": 4320 }, { "epoch": 14.9, "grad_norm": 1.041090726852417, "learning_rate": 1.7867586206896552e-05, "loss": 0.3462, "step": 4321 }, { "epoch": 14.903448275862068, "grad_norm": 0.8336577415466309, "learning_rate": 1.7871724137931038e-05, "loss": 0.3082, "step": 4322 }, { "epoch": 14.906896551724138, "grad_norm": 0.9069688320159912, "learning_rate": 1.7875862068965516e-05, "loss": 0.3037, "step": 4323 }, { "epoch": 14.910344827586208, "grad_norm": 0.8027486205101013, "learning_rate": 1.7879999999999998e-05, "loss": 0.3019, "step": 4324 }, { "epoch": 14.913793103448276, "grad_norm": 1.6110942363739014, "learning_rate": 1.7884137931034484e-05, "loss": 0.2927, "step": 4325 }, { "epoch": 14.917241379310346, "grad_norm": 0.7868813276290894, "learning_rate": 1.7888275862068966e-05, "loss": 0.2985, "step": 4326 }, { "epoch": 14.920689655172414, "grad_norm": 0.7446289658546448, "learning_rate": 1.789241379310345e-05, "loss": 0.2414, "step": 4327 }, { "epoch": 14.924137931034483, "grad_norm": 1.5432872772216797, "learning_rate": 1.789655172413793e-05, "loss": 0.3394, "step": 4328 }, { "epoch": 14.927586206896551, "grad_norm": 1.0140531063079834, "learning_rate": 1.7900689655172415e-05, "loss": 0.2856, "step": 4329 }, { "epoch": 14.931034482758621, "grad_norm": 1.1769652366638184, "learning_rate": 1.7904827586206897e-05, "loss": 0.3126, "step": 4330 }, { "epoch": 14.934482758620689, "grad_norm": 3.393345832824707, "learning_rate": 1.7908965517241383e-05, "loss": 0.3228, "step": 4331 }, { "epoch": 14.937931034482759, "grad_norm": 1.1347107887268066, "learning_rate": 1.791310344827586e-05, "loss": 0.2972, "step": 4332 }, { "epoch": 14.941379310344828, "grad_norm": 1.4058579206466675, "learning_rate": 1.7917241379310347e-05, "loss": 0.3324, "step": 4333 }, { "epoch": 14.944827586206896, "grad_norm": 1.4265273809432983, "learning_rate": 1.792137931034483e-05, "loss": 0.3764, "step": 4334 }, { "epoch": 14.948275862068966, "grad_norm": 2.5410354137420654, "learning_rate": 1.792551724137931e-05, "loss": 0.483, "step": 4335 }, { "epoch": 14.951724137931034, "grad_norm": 0.9122229218482971, "learning_rate": 1.7929655172413793e-05, "loss": 0.3754, "step": 4336 }, { "epoch": 14.955172413793104, "grad_norm": 1.0395612716674805, "learning_rate": 1.7933793103448275e-05, "loss": 0.3507, "step": 4337 }, { "epoch": 14.958620689655172, "grad_norm": 1.0135130882263184, "learning_rate": 1.793793103448276e-05, "loss": 0.3023, "step": 4338 }, { "epoch": 14.962068965517242, "grad_norm": 1.0594862699508667, "learning_rate": 1.7942068965517242e-05, "loss": 0.2986, "step": 4339 }, { "epoch": 14.96551724137931, "grad_norm": 1.892771601676941, "learning_rate": 1.7946206896551724e-05, "loss": 0.3019, "step": 4340 }, { "epoch": 14.96896551724138, "grad_norm": 0.7960776090621948, "learning_rate": 1.7950344827586206e-05, "loss": 0.3146, "step": 4341 }, { "epoch": 14.972413793103449, "grad_norm": 0.7800519466400146, "learning_rate": 1.795448275862069e-05, "loss": 0.3141, "step": 4342 }, { "epoch": 14.975862068965517, "grad_norm": 1.013863205909729, "learning_rate": 1.7958620689655173e-05, "loss": 0.2752, "step": 4343 }, { "epoch": 14.979310344827587, "grad_norm": 1.836362600326538, "learning_rate": 1.7962758620689656e-05, "loss": 0.3152, "step": 4344 }, { "epoch": 14.982758620689655, "grad_norm": 1.1773546934127808, "learning_rate": 1.7966896551724138e-05, "loss": 0.3098, "step": 4345 }, { "epoch": 14.986206896551725, "grad_norm": 1.1006020307540894, "learning_rate": 1.7971034482758623e-05, "loss": 0.3144, "step": 4346 }, { "epoch": 14.989655172413793, "grad_norm": 1.0360569953918457, "learning_rate": 1.7975172413793105e-05, "loss": 0.3212, "step": 4347 }, { "epoch": 14.993103448275862, "grad_norm": 0.9726084470748901, "learning_rate": 1.7979310344827587e-05, "loss": 0.3012, "step": 4348 }, { "epoch": 14.99655172413793, "grad_norm": 1.7320609092712402, "learning_rate": 1.798344827586207e-05, "loss": 0.3596, "step": 4349 }, { "epoch": 15.0, "grad_norm": 2.3274900913238525, "learning_rate": 1.798758620689655e-05, "loss": 0.429, "step": 4350 }, { "epoch": 15.00344827586207, "grad_norm": 2.050844669342041, "learning_rate": 1.7991724137931036e-05, "loss": 0.3553, "step": 4351 }, { "epoch": 15.006896551724138, "grad_norm": 0.9053387641906738, "learning_rate": 1.7995862068965515e-05, "loss": 0.3355, "step": 4352 }, { "epoch": 15.010344827586207, "grad_norm": 0.7011533379554749, "learning_rate": 1.8e-05, "loss": 0.2908, "step": 4353 }, { "epoch": 15.013793103448275, "grad_norm": 0.7819290161132812, "learning_rate": 1.8004137931034482e-05, "loss": 0.2988, "step": 4354 }, { "epoch": 15.017241379310345, "grad_norm": 0.6570120453834534, "learning_rate": 1.8008275862068968e-05, "loss": 0.3004, "step": 4355 }, { "epoch": 15.020689655172413, "grad_norm": 0.8702442049980164, "learning_rate": 1.8012413793103447e-05, "loss": 0.2808, "step": 4356 }, { "epoch": 15.024137931034483, "grad_norm": 0.650641143321991, "learning_rate": 1.8016551724137932e-05, "loss": 0.2668, "step": 4357 }, { "epoch": 15.027586206896551, "grad_norm": 0.733970046043396, "learning_rate": 1.8020689655172414e-05, "loss": 0.2989, "step": 4358 }, { "epoch": 15.03103448275862, "grad_norm": 1.3055224418640137, "learning_rate": 1.80248275862069e-05, "loss": 0.3156, "step": 4359 }, { "epoch": 15.03448275862069, "grad_norm": 1.2661601305007935, "learning_rate": 1.802896551724138e-05, "loss": 0.296, "step": 4360 }, { "epoch": 15.037931034482758, "grad_norm": 0.9802356958389282, "learning_rate": 1.803310344827586e-05, "loss": 0.288, "step": 4361 }, { "epoch": 15.041379310344828, "grad_norm": 0.8515610098838806, "learning_rate": 1.8037241379310345e-05, "loss": 0.2824, "step": 4362 }, { "epoch": 15.044827586206896, "grad_norm": 1.0031845569610596, "learning_rate": 1.8041379310344827e-05, "loss": 0.2998, "step": 4363 }, { "epoch": 15.048275862068966, "grad_norm": 0.8964065313339233, "learning_rate": 1.8045517241379313e-05, "loss": 0.274, "step": 4364 }, { "epoch": 15.051724137931034, "grad_norm": 0.9439828991889954, "learning_rate": 1.804965517241379e-05, "loss": 0.2817, "step": 4365 }, { "epoch": 15.055172413793104, "grad_norm": 1.0706357955932617, "learning_rate": 1.8053793103448277e-05, "loss": 0.2907, "step": 4366 }, { "epoch": 15.058620689655172, "grad_norm": 1.56211256980896, "learning_rate": 1.805793103448276e-05, "loss": 0.255, "step": 4367 }, { "epoch": 15.062068965517241, "grad_norm": 0.9058525562286377, "learning_rate": 1.8062068965517244e-05, "loss": 0.283, "step": 4368 }, { "epoch": 15.065517241379311, "grad_norm": 1.1052519083023071, "learning_rate": 1.8066206896551723e-05, "loss": 0.3237, "step": 4369 }, { "epoch": 15.068965517241379, "grad_norm": 3.1604557037353516, "learning_rate": 1.807034482758621e-05, "loss": 0.3065, "step": 4370 }, { "epoch": 15.072413793103449, "grad_norm": 1.279903769493103, "learning_rate": 1.807448275862069e-05, "loss": 0.2905, "step": 4371 }, { "epoch": 15.075862068965517, "grad_norm": 1.3844871520996094, "learning_rate": 1.8078620689655176e-05, "loss": 0.2946, "step": 4372 }, { "epoch": 15.079310344827586, "grad_norm": 0.8532236218452454, "learning_rate": 1.8082758620689654e-05, "loss": 0.3004, "step": 4373 }, { "epoch": 15.082758620689654, "grad_norm": 0.9027634859085083, "learning_rate": 1.8086896551724136e-05, "loss": 0.3434, "step": 4374 }, { "epoch": 15.086206896551724, "grad_norm": 1.5172879695892334, "learning_rate": 1.8091034482758622e-05, "loss": 0.4091, "step": 4375 }, { "epoch": 15.089655172413794, "grad_norm": 0.7321237921714783, "learning_rate": 1.8095172413793104e-05, "loss": 0.3595, "step": 4376 }, { "epoch": 15.093103448275862, "grad_norm": 0.645847499370575, "learning_rate": 1.8099310344827586e-05, "loss": 0.3, "step": 4377 }, { "epoch": 15.096551724137932, "grad_norm": 0.8797215223312378, "learning_rate": 1.8103448275862068e-05, "loss": 0.3108, "step": 4378 }, { "epoch": 15.1, "grad_norm": 0.6645488739013672, "learning_rate": 1.8107586206896553e-05, "loss": 0.2955, "step": 4379 }, { "epoch": 15.10344827586207, "grad_norm": 0.7820160388946533, "learning_rate": 1.8111724137931035e-05, "loss": 0.3182, "step": 4380 }, { "epoch": 15.106896551724137, "grad_norm": 1.2536712884902954, "learning_rate": 1.8115862068965517e-05, "loss": 0.2792, "step": 4381 }, { "epoch": 15.110344827586207, "grad_norm": 0.7231913805007935, "learning_rate": 1.812e-05, "loss": 0.3067, "step": 4382 }, { "epoch": 15.113793103448275, "grad_norm": 0.6833490133285522, "learning_rate": 1.8124137931034485e-05, "loss": 0.284, "step": 4383 }, { "epoch": 15.117241379310345, "grad_norm": 1.0343453884124756, "learning_rate": 1.8128275862068967e-05, "loss": 0.2894, "step": 4384 }, { "epoch": 15.120689655172415, "grad_norm": 0.9249205589294434, "learning_rate": 1.813241379310345e-05, "loss": 0.2542, "step": 4385 }, { "epoch": 15.124137931034483, "grad_norm": 0.7146588563919067, "learning_rate": 1.813655172413793e-05, "loss": 0.2966, "step": 4386 }, { "epoch": 15.127586206896552, "grad_norm": 0.9278146624565125, "learning_rate": 1.8140689655172413e-05, "loss": 0.2825, "step": 4387 }, { "epoch": 15.13103448275862, "grad_norm": 1.0316206216812134, "learning_rate": 1.8144827586206898e-05, "loss": 0.2557, "step": 4388 }, { "epoch": 15.13448275862069, "grad_norm": 2.7044334411621094, "learning_rate": 1.8148965517241377e-05, "loss": 0.3289, "step": 4389 }, { "epoch": 15.137931034482758, "grad_norm": 0.8685182332992554, "learning_rate": 1.8153103448275862e-05, "loss": 0.3044, "step": 4390 }, { "epoch": 15.141379310344828, "grad_norm": 1.1387954950332642, "learning_rate": 1.8157241379310344e-05, "loss": 0.2874, "step": 4391 }, { "epoch": 15.144827586206896, "grad_norm": 1.012197732925415, "learning_rate": 1.816137931034483e-05, "loss": 0.2727, "step": 4392 }, { "epoch": 15.148275862068965, "grad_norm": 0.773809015750885, "learning_rate": 1.816551724137931e-05, "loss": 0.2885, "step": 4393 }, { "epoch": 15.151724137931035, "grad_norm": 1.7589980363845825, "learning_rate": 1.8169655172413794e-05, "loss": 0.3027, "step": 4394 }, { "epoch": 15.155172413793103, "grad_norm": 0.9882201552391052, "learning_rate": 1.8173793103448276e-05, "loss": 0.2831, "step": 4395 }, { "epoch": 15.158620689655173, "grad_norm": 0.9851500391960144, "learning_rate": 1.817793103448276e-05, "loss": 0.3209, "step": 4396 }, { "epoch": 15.162068965517241, "grad_norm": 1.0522243976593018, "learning_rate": 1.8182068965517243e-05, "loss": 0.2682, "step": 4397 }, { "epoch": 15.16551724137931, "grad_norm": 1.212569236755371, "learning_rate": 1.8186206896551725e-05, "loss": 0.3275, "step": 4398 }, { "epoch": 15.168965517241379, "grad_norm": 2.0471904277801514, "learning_rate": 1.8190344827586207e-05, "loss": 0.3756, "step": 4399 }, { "epoch": 15.172413793103448, "grad_norm": 1.9207366704940796, "learning_rate": 1.819448275862069e-05, "loss": 0.4508, "step": 4400 }, { "epoch": 15.175862068965516, "grad_norm": 0.7317143678665161, "learning_rate": 1.8198620689655175e-05, "loss": 0.3924, "step": 4401 }, { "epoch": 15.179310344827586, "grad_norm": 0.9090650081634521, "learning_rate": 1.8202758620689653e-05, "loss": 0.2841, "step": 4402 }, { "epoch": 15.182758620689656, "grad_norm": 0.5483998656272888, "learning_rate": 1.820689655172414e-05, "loss": 0.3057, "step": 4403 }, { "epoch": 15.186206896551724, "grad_norm": 0.7688605189323425, "learning_rate": 1.821103448275862e-05, "loss": 0.3128, "step": 4404 }, { "epoch": 15.189655172413794, "grad_norm": 1.170863151550293, "learning_rate": 1.8215172413793106e-05, "loss": 0.2886, "step": 4405 }, { "epoch": 15.193103448275862, "grad_norm": 1.180886149406433, "learning_rate": 1.8219310344827585e-05, "loss": 0.2884, "step": 4406 }, { "epoch": 15.196551724137931, "grad_norm": 1.2345885038375854, "learning_rate": 1.822344827586207e-05, "loss": 0.3254, "step": 4407 }, { "epoch": 15.2, "grad_norm": 0.9183626174926758, "learning_rate": 1.8227586206896552e-05, "loss": 0.3035, "step": 4408 }, { "epoch": 15.203448275862069, "grad_norm": 0.8535701632499695, "learning_rate": 1.8231724137931038e-05, "loss": 0.3067, "step": 4409 }, { "epoch": 15.206896551724139, "grad_norm": 7.208155155181885, "learning_rate": 1.8235862068965516e-05, "loss": 0.2844, "step": 4410 }, { "epoch": 15.210344827586207, "grad_norm": 1.4345356225967407, "learning_rate": 1.824e-05, "loss": 0.3091, "step": 4411 }, { "epoch": 15.213793103448277, "grad_norm": 0.9334778189659119, "learning_rate": 1.8244137931034484e-05, "loss": 0.3069, "step": 4412 }, { "epoch": 15.217241379310344, "grad_norm": 0.9832833409309387, "learning_rate": 1.8248275862068966e-05, "loss": 0.2904, "step": 4413 }, { "epoch": 15.220689655172414, "grad_norm": 0.8100156188011169, "learning_rate": 1.8252413793103448e-05, "loss": 0.2945, "step": 4414 }, { "epoch": 15.224137931034482, "grad_norm": 1.10850191116333, "learning_rate": 1.825655172413793e-05, "loss": 0.3088, "step": 4415 }, { "epoch": 15.227586206896552, "grad_norm": 1.2242532968521118, "learning_rate": 1.8260689655172415e-05, "loss": 0.3116, "step": 4416 }, { "epoch": 15.23103448275862, "grad_norm": 1.1708347797393799, "learning_rate": 1.8264827586206897e-05, "loss": 0.27, "step": 4417 }, { "epoch": 15.23448275862069, "grad_norm": 1.7674946784973145, "learning_rate": 1.826896551724138e-05, "loss": 0.3237, "step": 4418 }, { "epoch": 15.23793103448276, "grad_norm": 0.9863287806510925, "learning_rate": 1.827310344827586e-05, "loss": 0.2912, "step": 4419 }, { "epoch": 15.241379310344827, "grad_norm": 1.944066047668457, "learning_rate": 1.8277241379310346e-05, "loss": 0.3006, "step": 4420 }, { "epoch": 15.244827586206897, "grad_norm": 1.2925043106079102, "learning_rate": 1.828137931034483e-05, "loss": 0.3857, "step": 4421 }, { "epoch": 15.248275862068965, "grad_norm": 1.485141634941101, "learning_rate": 1.828551724137931e-05, "loss": 0.2828, "step": 4422 }, { "epoch": 15.251724137931035, "grad_norm": 0.9907310605049133, "learning_rate": 1.8289655172413793e-05, "loss": 0.3268, "step": 4423 }, { "epoch": 15.255172413793103, "grad_norm": 1.8547242879867554, "learning_rate": 1.8293793103448278e-05, "loss": 0.3486, "step": 4424 }, { "epoch": 15.258620689655173, "grad_norm": 1.8776291608810425, "learning_rate": 1.829793103448276e-05, "loss": 0.4876, "step": 4425 }, { "epoch": 15.26206896551724, "grad_norm": 0.9209262132644653, "learning_rate": 1.830206896551724e-05, "loss": 0.3808, "step": 4426 }, { "epoch": 15.26551724137931, "grad_norm": 0.8635491728782654, "learning_rate": 1.8306206896551724e-05, "loss": 0.3151, "step": 4427 }, { "epoch": 15.26896551724138, "grad_norm": 0.7248004674911499, "learning_rate": 1.8310344827586206e-05, "loss": 0.3474, "step": 4428 }, { "epoch": 15.272413793103448, "grad_norm": 1.0222853422164917, "learning_rate": 1.831448275862069e-05, "loss": 0.3475, "step": 4429 }, { "epoch": 15.275862068965518, "grad_norm": 0.8825634121894836, "learning_rate": 1.8318620689655173e-05, "loss": 0.2912, "step": 4430 }, { "epoch": 15.279310344827586, "grad_norm": 1.5782747268676758, "learning_rate": 1.8322758620689655e-05, "loss": 0.32, "step": 4431 }, { "epoch": 15.282758620689656, "grad_norm": 18.253347396850586, "learning_rate": 1.8326896551724137e-05, "loss": 0.3023, "step": 4432 }, { "epoch": 15.286206896551723, "grad_norm": 0.9084608554840088, "learning_rate": 1.8331034482758623e-05, "loss": 0.319, "step": 4433 }, { "epoch": 15.289655172413793, "grad_norm": 1.4176379442214966, "learning_rate": 1.8335172413793105e-05, "loss": 0.2882, "step": 4434 }, { "epoch": 15.293103448275861, "grad_norm": 0.6655446887016296, "learning_rate": 1.8339310344827587e-05, "loss": 0.285, "step": 4435 }, { "epoch": 15.296551724137931, "grad_norm": 0.8355607390403748, "learning_rate": 1.834344827586207e-05, "loss": 0.3067, "step": 4436 }, { "epoch": 15.3, "grad_norm": 1.2538847923278809, "learning_rate": 1.8347586206896554e-05, "loss": 0.3323, "step": 4437 }, { "epoch": 15.303448275862069, "grad_norm": 0.7582781314849854, "learning_rate": 1.8351724137931036e-05, "loss": 0.2968, "step": 4438 }, { "epoch": 15.306896551724138, "grad_norm": 1.0490608215332031, "learning_rate": 1.8355862068965515e-05, "loss": 0.2881, "step": 4439 }, { "epoch": 15.310344827586206, "grad_norm": 11.602336883544922, "learning_rate": 1.836e-05, "loss": 0.2933, "step": 4440 }, { "epoch": 15.313793103448276, "grad_norm": 1.0411760807037354, "learning_rate": 1.8364137931034482e-05, "loss": 0.2889, "step": 4441 }, { "epoch": 15.317241379310344, "grad_norm": 0.9904917478561401, "learning_rate": 1.8368275862068968e-05, "loss": 0.2865, "step": 4442 }, { "epoch": 15.320689655172414, "grad_norm": 1.599798321723938, "learning_rate": 1.8372413793103446e-05, "loss": 0.2576, "step": 4443 }, { "epoch": 15.324137931034482, "grad_norm": 0.9772042632102966, "learning_rate": 1.8376551724137932e-05, "loss": 0.2781, "step": 4444 }, { "epoch": 15.327586206896552, "grad_norm": 1.6389139890670776, "learning_rate": 1.8380689655172414e-05, "loss": 0.3178, "step": 4445 }, { "epoch": 15.331034482758621, "grad_norm": 1.5506374835968018, "learning_rate": 1.83848275862069e-05, "loss": 0.318, "step": 4446 }, { "epoch": 15.33448275862069, "grad_norm": 1.7019762992858887, "learning_rate": 1.8388965517241378e-05, "loss": 0.3012, "step": 4447 }, { "epoch": 15.337931034482759, "grad_norm": 1.1058106422424316, "learning_rate": 1.8393103448275863e-05, "loss": 0.3253, "step": 4448 }, { "epoch": 15.341379310344827, "grad_norm": 1.3118807077407837, "learning_rate": 1.8397241379310345e-05, "loss": 0.3147, "step": 4449 }, { "epoch": 15.344827586206897, "grad_norm": 2.4437358379364014, "learning_rate": 1.840137931034483e-05, "loss": 0.4667, "step": 4450 }, { "epoch": 15.348275862068965, "grad_norm": 0.9353926181793213, "learning_rate": 1.840551724137931e-05, "loss": 0.3787, "step": 4451 }, { "epoch": 15.351724137931035, "grad_norm": 0.7478002905845642, "learning_rate": 1.840965517241379e-05, "loss": 0.3542, "step": 4452 }, { "epoch": 15.355172413793104, "grad_norm": 0.6640517711639404, "learning_rate": 1.8413793103448277e-05, "loss": 0.357, "step": 4453 }, { "epoch": 15.358620689655172, "grad_norm": 0.7497448921203613, "learning_rate": 1.841793103448276e-05, "loss": 0.2778, "step": 4454 }, { "epoch": 15.362068965517242, "grad_norm": 0.846449077129364, "learning_rate": 1.842206896551724e-05, "loss": 0.287, "step": 4455 }, { "epoch": 15.36551724137931, "grad_norm": 1.08950936794281, "learning_rate": 1.8426206896551723e-05, "loss": 0.3347, "step": 4456 }, { "epoch": 15.36896551724138, "grad_norm": 0.9459695219993591, "learning_rate": 1.8430344827586208e-05, "loss": 0.2857, "step": 4457 }, { "epoch": 15.372413793103448, "grad_norm": 0.8478885889053345, "learning_rate": 1.843448275862069e-05, "loss": 0.2913, "step": 4458 }, { "epoch": 15.375862068965517, "grad_norm": 0.9509575963020325, "learning_rate": 1.8438620689655172e-05, "loss": 0.2868, "step": 4459 }, { "epoch": 15.379310344827585, "grad_norm": 0.8994165658950806, "learning_rate": 1.8442758620689654e-05, "loss": 0.3033, "step": 4460 }, { "epoch": 15.382758620689655, "grad_norm": 0.9572462439537048, "learning_rate": 1.844689655172414e-05, "loss": 0.2851, "step": 4461 }, { "epoch": 15.386206896551725, "grad_norm": 0.8522143959999084, "learning_rate": 1.8451034482758622e-05, "loss": 0.3139, "step": 4462 }, { "epoch": 15.389655172413793, "grad_norm": 1.0186020135879517, "learning_rate": 1.8455172413793107e-05, "loss": 0.308, "step": 4463 }, { "epoch": 15.393103448275863, "grad_norm": 1.1569647789001465, "learning_rate": 1.8459310344827586e-05, "loss": 0.3056, "step": 4464 }, { "epoch": 15.39655172413793, "grad_norm": 0.9896965622901917, "learning_rate": 1.8463448275862068e-05, "loss": 0.3191, "step": 4465 }, { "epoch": 15.4, "grad_norm": 0.6994752883911133, "learning_rate": 1.8467586206896553e-05, "loss": 0.2501, "step": 4466 }, { "epoch": 15.403448275862068, "grad_norm": 0.9602481126785278, "learning_rate": 1.8471724137931035e-05, "loss": 0.3094, "step": 4467 }, { "epoch": 15.406896551724138, "grad_norm": 1.7902858257293701, "learning_rate": 1.8475862068965517e-05, "loss": 0.293, "step": 4468 }, { "epoch": 15.410344827586206, "grad_norm": 0.8995877504348755, "learning_rate": 1.848e-05, "loss": 0.3259, "step": 4469 }, { "epoch": 15.413793103448276, "grad_norm": 1.0192360877990723, "learning_rate": 1.8484137931034485e-05, "loss": 0.2935, "step": 4470 }, { "epoch": 15.417241379310346, "grad_norm": 1.1433569192886353, "learning_rate": 1.8488275862068967e-05, "loss": 0.3218, "step": 4471 }, { "epoch": 15.420689655172414, "grad_norm": 1.5978291034698486, "learning_rate": 1.849241379310345e-05, "loss": 0.3079, "step": 4472 }, { "epoch": 15.424137931034483, "grad_norm": 1.7655223608016968, "learning_rate": 1.849655172413793e-05, "loss": 0.3185, "step": 4473 }, { "epoch": 15.427586206896551, "grad_norm": 1.3826006650924683, "learning_rate": 1.8500689655172416e-05, "loss": 0.3447, "step": 4474 }, { "epoch": 15.431034482758621, "grad_norm": 2.672194004058838, "learning_rate": 1.8504827586206898e-05, "loss": 0.4499, "step": 4475 }, { "epoch": 15.434482758620689, "grad_norm": 0.8347530961036682, "learning_rate": 1.850896551724138e-05, "loss": 0.369, "step": 4476 }, { "epoch": 15.437931034482759, "grad_norm": 1.2264443635940552, "learning_rate": 1.8513103448275862e-05, "loss": 0.3172, "step": 4477 }, { "epoch": 15.441379310344828, "grad_norm": 0.5985655784606934, "learning_rate": 1.8517241379310344e-05, "loss": 0.304, "step": 4478 }, { "epoch": 15.444827586206896, "grad_norm": 0.860369086265564, "learning_rate": 1.852137931034483e-05, "loss": 0.3065, "step": 4479 }, { "epoch": 15.448275862068966, "grad_norm": 1.1260627508163452, "learning_rate": 1.8525517241379308e-05, "loss": 0.3384, "step": 4480 }, { "epoch": 15.451724137931034, "grad_norm": 1.3443015813827515, "learning_rate": 1.8529655172413794e-05, "loss": 0.2791, "step": 4481 }, { "epoch": 15.455172413793104, "grad_norm": 0.7232553958892822, "learning_rate": 1.8533793103448276e-05, "loss": 0.3007, "step": 4482 }, { "epoch": 15.458620689655172, "grad_norm": 0.9685875773429871, "learning_rate": 1.853793103448276e-05, "loss": 0.2896, "step": 4483 }, { "epoch": 15.462068965517242, "grad_norm": 0.754004180431366, "learning_rate": 1.854206896551724e-05, "loss": 0.254, "step": 4484 }, { "epoch": 15.46551724137931, "grad_norm": 1.271815299987793, "learning_rate": 1.8546206896551725e-05, "loss": 0.2881, "step": 4485 }, { "epoch": 15.46896551724138, "grad_norm": 0.7306249737739563, "learning_rate": 1.8550344827586207e-05, "loss": 0.2932, "step": 4486 }, { "epoch": 15.472413793103449, "grad_norm": 0.9264547228813171, "learning_rate": 1.8554482758620693e-05, "loss": 0.3577, "step": 4487 }, { "epoch": 15.475862068965517, "grad_norm": 1.1365898847579956, "learning_rate": 1.855862068965517e-05, "loss": 0.287, "step": 4488 }, { "epoch": 15.479310344827587, "grad_norm": 0.6841462254524231, "learning_rate": 1.8562758620689657e-05, "loss": 0.3099, "step": 4489 }, { "epoch": 15.482758620689655, "grad_norm": 0.9047029614448547, "learning_rate": 1.856689655172414e-05, "loss": 0.2995, "step": 4490 }, { "epoch": 15.486206896551725, "grad_norm": 1.1557754278182983, "learning_rate": 1.857103448275862e-05, "loss": 0.3117, "step": 4491 }, { "epoch": 15.489655172413793, "grad_norm": 0.8856734037399292, "learning_rate": 1.8575172413793103e-05, "loss": 0.2701, "step": 4492 }, { "epoch": 15.493103448275862, "grad_norm": 1.5434385538101196, "learning_rate": 1.8579310344827585e-05, "loss": 0.2617, "step": 4493 }, { "epoch": 15.49655172413793, "grad_norm": 1.4500664472579956, "learning_rate": 1.858344827586207e-05, "loss": 0.3067, "step": 4494 }, { "epoch": 15.5, "grad_norm": 0.9046564698219299, "learning_rate": 1.8587586206896552e-05, "loss": 0.2742, "step": 4495 }, { "epoch": 15.50344827586207, "grad_norm": 1.0646146535873413, "learning_rate": 1.8591724137931037e-05, "loss": 0.2706, "step": 4496 }, { "epoch": 15.506896551724138, "grad_norm": 1.2454118728637695, "learning_rate": 1.8595862068965516e-05, "loss": 0.281, "step": 4497 }, { "epoch": 15.510344827586207, "grad_norm": 1.2455134391784668, "learning_rate": 1.86e-05, "loss": 0.3396, "step": 4498 }, { "epoch": 15.513793103448275, "grad_norm": 1.5173512697219849, "learning_rate": 1.8604137931034484e-05, "loss": 0.3361, "step": 4499 }, { "epoch": 15.517241379310345, "grad_norm": 1.3557095527648926, "learning_rate": 1.860827586206897e-05, "loss": 0.4613, "step": 4500 }, { "epoch": 15.520689655172413, "grad_norm": 0.8800420165061951, "learning_rate": 1.8612413793103448e-05, "loss": 0.3509, "step": 4501 }, { "epoch": 15.524137931034483, "grad_norm": 0.7181717753410339, "learning_rate": 1.8616551724137933e-05, "loss": 0.2893, "step": 4502 }, { "epoch": 15.527586206896551, "grad_norm": 0.7644246816635132, "learning_rate": 1.8620689655172415e-05, "loss": 0.3403, "step": 4503 }, { "epoch": 15.53103448275862, "grad_norm": 0.6905649304389954, "learning_rate": 1.8624827586206897e-05, "loss": 0.3282, "step": 4504 }, { "epoch": 15.53448275862069, "grad_norm": 0.7313365340232849, "learning_rate": 1.862896551724138e-05, "loss": 0.3158, "step": 4505 }, { "epoch": 15.537931034482758, "grad_norm": 1.0300419330596924, "learning_rate": 1.863310344827586e-05, "loss": 0.2953, "step": 4506 }, { "epoch": 15.541379310344828, "grad_norm": 0.8431609869003296, "learning_rate": 1.8637241379310346e-05, "loss": 0.3129, "step": 4507 }, { "epoch": 15.544827586206896, "grad_norm": 0.8097198605537415, "learning_rate": 1.864137931034483e-05, "loss": 0.3184, "step": 4508 }, { "epoch": 15.548275862068966, "grad_norm": 0.8090161681175232, "learning_rate": 1.864551724137931e-05, "loss": 0.2954, "step": 4509 }, { "epoch": 15.551724137931034, "grad_norm": 1.3679696321487427, "learning_rate": 1.8649655172413792e-05, "loss": 0.2869, "step": 4510 }, { "epoch": 15.555172413793104, "grad_norm": 1.131880521774292, "learning_rate": 1.8653793103448278e-05, "loss": 0.2943, "step": 4511 }, { "epoch": 15.558620689655172, "grad_norm": 1.1581398248672485, "learning_rate": 1.865793103448276e-05, "loss": 0.2747, "step": 4512 }, { "epoch": 15.562068965517241, "grad_norm": 0.9962515830993652, "learning_rate": 1.8662068965517242e-05, "loss": 0.3194, "step": 4513 }, { "epoch": 15.565517241379311, "grad_norm": 0.8879286050796509, "learning_rate": 1.8666206896551724e-05, "loss": 0.3038, "step": 4514 }, { "epoch": 15.568965517241379, "grad_norm": 1.0708075761795044, "learning_rate": 1.8670344827586206e-05, "loss": 0.3419, "step": 4515 }, { "epoch": 15.572413793103449, "grad_norm": 0.8800972700119019, "learning_rate": 1.867448275862069e-05, "loss": 0.2928, "step": 4516 }, { "epoch": 15.575862068965517, "grad_norm": 1.0576545000076294, "learning_rate": 1.867862068965517e-05, "loss": 0.3069, "step": 4517 }, { "epoch": 15.579310344827586, "grad_norm": 1.0124459266662598, "learning_rate": 1.8682758620689655e-05, "loss": 0.2911, "step": 4518 }, { "epoch": 15.582758620689654, "grad_norm": 0.885917067527771, "learning_rate": 1.8686896551724137e-05, "loss": 0.2623, "step": 4519 }, { "epoch": 15.586206896551724, "grad_norm": 1.243080973625183, "learning_rate": 1.8691034482758623e-05, "loss": 0.3318, "step": 4520 }, { "epoch": 15.589655172413792, "grad_norm": 1.1896815299987793, "learning_rate": 1.86951724137931e-05, "loss": 0.3175, "step": 4521 }, { "epoch": 15.593103448275862, "grad_norm": 1.749337077140808, "learning_rate": 1.8699310344827587e-05, "loss": 0.3005, "step": 4522 }, { "epoch": 15.596551724137932, "grad_norm": 1.2007317543029785, "learning_rate": 1.870344827586207e-05, "loss": 0.3109, "step": 4523 }, { "epoch": 15.6, "grad_norm": 1.7305999994277954, "learning_rate": 1.8707586206896554e-05, "loss": 0.335, "step": 4524 }, { "epoch": 15.60344827586207, "grad_norm": 2.145085334777832, "learning_rate": 1.8711724137931033e-05, "loss": 0.4097, "step": 4525 }, { "epoch": 15.606896551724137, "grad_norm": 0.7193364500999451, "learning_rate": 1.871586206896552e-05, "loss": 0.3319, "step": 4526 }, { "epoch": 15.610344827586207, "grad_norm": 0.7302389144897461, "learning_rate": 1.872e-05, "loss": 0.318, "step": 4527 }, { "epoch": 15.613793103448275, "grad_norm": 0.8355387449264526, "learning_rate": 1.8724137931034482e-05, "loss": 0.3066, "step": 4528 }, { "epoch": 15.617241379310345, "grad_norm": 0.7054998278617859, "learning_rate": 1.8728275862068968e-05, "loss": 0.2955, "step": 4529 }, { "epoch": 15.620689655172415, "grad_norm": 0.7394112348556519, "learning_rate": 1.8732413793103446e-05, "loss": 0.2882, "step": 4530 }, { "epoch": 15.624137931034483, "grad_norm": 0.8148946762084961, "learning_rate": 1.8736551724137932e-05, "loss": 0.2991, "step": 4531 }, { "epoch": 15.627586206896552, "grad_norm": 0.8876234889030457, "learning_rate": 1.8740689655172414e-05, "loss": 0.2784, "step": 4532 }, { "epoch": 15.63103448275862, "grad_norm": 1.1029164791107178, "learning_rate": 1.87448275862069e-05, "loss": 0.3242, "step": 4533 }, { "epoch": 15.63448275862069, "grad_norm": 1.3412823677062988, "learning_rate": 1.8748965517241378e-05, "loss": 0.3068, "step": 4534 }, { "epoch": 15.637931034482758, "grad_norm": 0.9797614812850952, "learning_rate": 1.8753103448275863e-05, "loss": 0.3033, "step": 4535 }, { "epoch": 15.641379310344828, "grad_norm": 0.7280862331390381, "learning_rate": 1.8757241379310345e-05, "loss": 0.29, "step": 4536 }, { "epoch": 15.644827586206896, "grad_norm": 0.7323205471038818, "learning_rate": 1.876137931034483e-05, "loss": 0.3516, "step": 4537 }, { "epoch": 15.648275862068965, "grad_norm": 0.788333535194397, "learning_rate": 1.876551724137931e-05, "loss": 0.2883, "step": 4538 }, { "epoch": 15.651724137931035, "grad_norm": 0.7224892973899841, "learning_rate": 1.8769655172413795e-05, "loss": 0.2724, "step": 4539 }, { "epoch": 15.655172413793103, "grad_norm": 0.6711258292198181, "learning_rate": 1.8773793103448277e-05, "loss": 0.2819, "step": 4540 }, { "epoch": 15.658620689655173, "grad_norm": 0.8194411993026733, "learning_rate": 1.877793103448276e-05, "loss": 0.2908, "step": 4541 }, { "epoch": 15.662068965517241, "grad_norm": 0.8638328909873962, "learning_rate": 1.878206896551724e-05, "loss": 0.3109, "step": 4542 }, { "epoch": 15.66551724137931, "grad_norm": 1.590585470199585, "learning_rate": 1.8786206896551723e-05, "loss": 0.2891, "step": 4543 }, { "epoch": 15.668965517241379, "grad_norm": 1.2261956930160522, "learning_rate": 1.8790344827586208e-05, "loss": 0.3165, "step": 4544 }, { "epoch": 15.672413793103448, "grad_norm": 1.2856597900390625, "learning_rate": 1.879448275862069e-05, "loss": 0.3281, "step": 4545 }, { "epoch": 15.675862068965518, "grad_norm": 1.5396567583084106, "learning_rate": 1.8798620689655172e-05, "loss": 0.3356, "step": 4546 }, { "epoch": 15.679310344827586, "grad_norm": 1.2912712097167969, "learning_rate": 1.8802758620689654e-05, "loss": 0.319, "step": 4547 }, { "epoch": 15.682758620689656, "grad_norm": 1.0278903245925903, "learning_rate": 1.880689655172414e-05, "loss": 0.305, "step": 4548 }, { "epoch": 15.686206896551724, "grad_norm": 0.9749672412872314, "learning_rate": 1.881103448275862e-05, "loss": 0.316, "step": 4549 }, { "epoch": 15.689655172413794, "grad_norm": 2.5910379886627197, "learning_rate": 1.8815172413793104e-05, "loss": 0.4163, "step": 4550 }, { "epoch": 15.693103448275862, "grad_norm": 0.9028560519218445, "learning_rate": 1.8819310344827586e-05, "loss": 0.3471, "step": 4551 }, { "epoch": 15.696551724137931, "grad_norm": 0.641305685043335, "learning_rate": 1.882344827586207e-05, "loss": 0.3254, "step": 4552 }, { "epoch": 15.7, "grad_norm": 0.6332208514213562, "learning_rate": 1.8827586206896553e-05, "loss": 0.2794, "step": 4553 }, { "epoch": 15.703448275862069, "grad_norm": 0.6608474254608154, "learning_rate": 1.8831724137931032e-05, "loss": 0.33, "step": 4554 }, { "epoch": 15.706896551724139, "grad_norm": 1.1405597925186157, "learning_rate": 1.8835862068965517e-05, "loss": 0.3119, "step": 4555 }, { "epoch": 15.710344827586207, "grad_norm": 0.6742851734161377, "learning_rate": 1.884e-05, "loss": 0.3256, "step": 4556 }, { "epoch": 15.713793103448277, "grad_norm": 1.3382402658462524, "learning_rate": 1.8844137931034485e-05, "loss": 0.3055, "step": 4557 }, { "epoch": 15.717241379310344, "grad_norm": 0.9272301197052002, "learning_rate": 1.8848275862068963e-05, "loss": 0.3289, "step": 4558 }, { "epoch": 15.720689655172414, "grad_norm": 0.60773766040802, "learning_rate": 1.885241379310345e-05, "loss": 0.2808, "step": 4559 }, { "epoch": 15.724137931034482, "grad_norm": 0.883780837059021, "learning_rate": 1.885655172413793e-05, "loss": 0.2995, "step": 4560 }, { "epoch": 15.727586206896552, "grad_norm": 0.8161359429359436, "learning_rate": 1.8860689655172416e-05, "loss": 0.3086, "step": 4561 }, { "epoch": 15.73103448275862, "grad_norm": 1.0800174474716187, "learning_rate": 1.8864827586206898e-05, "loss": 0.3121, "step": 4562 }, { "epoch": 15.73448275862069, "grad_norm": 2.5334811210632324, "learning_rate": 1.886896551724138e-05, "loss": 0.2611, "step": 4563 }, { "epoch": 15.73793103448276, "grad_norm": 0.7639361619949341, "learning_rate": 1.8873103448275862e-05, "loss": 0.2943, "step": 4564 }, { "epoch": 15.741379310344827, "grad_norm": 3.175109386444092, "learning_rate": 1.8877241379310348e-05, "loss": 0.2783, "step": 4565 }, { "epoch": 15.744827586206897, "grad_norm": 0.912328839302063, "learning_rate": 1.888137931034483e-05, "loss": 0.2795, "step": 4566 }, { "epoch": 15.748275862068965, "grad_norm": 1.0728737115859985, "learning_rate": 1.8885517241379308e-05, "loss": 0.2996, "step": 4567 }, { "epoch": 15.751724137931035, "grad_norm": 0.8987631797790527, "learning_rate": 1.8889655172413794e-05, "loss": 0.3196, "step": 4568 }, { "epoch": 15.755172413793103, "grad_norm": 0.9613704085350037, "learning_rate": 1.8893793103448276e-05, "loss": 0.3002, "step": 4569 }, { "epoch": 15.758620689655173, "grad_norm": 0.9717147946357727, "learning_rate": 1.889793103448276e-05, "loss": 0.2979, "step": 4570 }, { "epoch": 15.76206896551724, "grad_norm": 0.702438473701477, "learning_rate": 1.890206896551724e-05, "loss": 0.3207, "step": 4571 }, { "epoch": 15.76551724137931, "grad_norm": 1.9644414186477661, "learning_rate": 1.8906206896551725e-05, "loss": 0.2978, "step": 4572 }, { "epoch": 15.76896551724138, "grad_norm": 1.116557002067566, "learning_rate": 1.8910344827586207e-05, "loss": 0.3025, "step": 4573 }, { "epoch": 15.772413793103448, "grad_norm": 1.5173925161361694, "learning_rate": 1.8914482758620692e-05, "loss": 0.3265, "step": 4574 }, { "epoch": 15.775862068965518, "grad_norm": 2.2412238121032715, "learning_rate": 1.891862068965517e-05, "loss": 0.4277, "step": 4575 }, { "epoch": 15.779310344827586, "grad_norm": 0.8061805367469788, "learning_rate": 1.8922758620689656e-05, "loss": 0.3165, "step": 4576 }, { "epoch": 15.782758620689656, "grad_norm": 0.6977750658988953, "learning_rate": 1.892689655172414e-05, "loss": 0.3228, "step": 4577 }, { "epoch": 15.786206896551723, "grad_norm": 0.7727911472320557, "learning_rate": 1.8931034482758624e-05, "loss": 0.2927, "step": 4578 }, { "epoch": 15.789655172413793, "grad_norm": 0.5675490498542786, "learning_rate": 1.8935172413793103e-05, "loss": 0.3083, "step": 4579 }, { "epoch": 15.793103448275861, "grad_norm": 1.3424813747406006, "learning_rate": 1.8939310344827585e-05, "loss": 0.3179, "step": 4580 }, { "epoch": 15.796551724137931, "grad_norm": 0.6696300506591797, "learning_rate": 1.894344827586207e-05, "loss": 0.2728, "step": 4581 }, { "epoch": 15.8, "grad_norm": 0.777932345867157, "learning_rate": 1.8947586206896552e-05, "loss": 0.3559, "step": 4582 }, { "epoch": 15.803448275862069, "grad_norm": 1.4808247089385986, "learning_rate": 1.8951724137931034e-05, "loss": 0.3122, "step": 4583 }, { "epoch": 15.806896551724138, "grad_norm": 0.8751469254493713, "learning_rate": 1.8955862068965516e-05, "loss": 0.2901, "step": 4584 }, { "epoch": 15.810344827586206, "grad_norm": 0.741432785987854, "learning_rate": 1.896e-05, "loss": 0.2631, "step": 4585 }, { "epoch": 15.813793103448276, "grad_norm": 1.2556778192520142, "learning_rate": 1.8964137931034483e-05, "loss": 0.2779, "step": 4586 }, { "epoch": 15.817241379310344, "grad_norm": 0.867336630821228, "learning_rate": 1.8968275862068965e-05, "loss": 0.2599, "step": 4587 }, { "epoch": 15.820689655172414, "grad_norm": 0.8720547556877136, "learning_rate": 1.8972413793103447e-05, "loss": 0.2947, "step": 4588 }, { "epoch": 15.824137931034482, "grad_norm": 1.508867859840393, "learning_rate": 1.8976551724137933e-05, "loss": 0.2805, "step": 4589 }, { "epoch": 15.827586206896552, "grad_norm": 1.4079532623291016, "learning_rate": 1.8980689655172415e-05, "loss": 0.3047, "step": 4590 }, { "epoch": 15.831034482758621, "grad_norm": 1.1332627534866333, "learning_rate": 1.8984827586206897e-05, "loss": 0.3038, "step": 4591 }, { "epoch": 15.83448275862069, "grad_norm": 2.067784547805786, "learning_rate": 1.898896551724138e-05, "loss": 0.2876, "step": 4592 }, { "epoch": 15.837931034482759, "grad_norm": 0.8872734308242798, "learning_rate": 1.899310344827586e-05, "loss": 0.2832, "step": 4593 }, { "epoch": 15.841379310344827, "grad_norm": 0.9683377742767334, "learning_rate": 1.8997241379310346e-05, "loss": 0.2601, "step": 4594 }, { "epoch": 15.844827586206897, "grad_norm": 1.0443975925445557, "learning_rate": 1.900137931034483e-05, "loss": 0.2576, "step": 4595 }, { "epoch": 15.848275862068965, "grad_norm": 1.2889070510864258, "learning_rate": 1.900551724137931e-05, "loss": 0.3172, "step": 4596 }, { "epoch": 15.851724137931035, "grad_norm": 0.9903459548950195, "learning_rate": 1.9009655172413792e-05, "loss": 0.3191, "step": 4597 }, { "epoch": 15.855172413793104, "grad_norm": 1.3427693843841553, "learning_rate": 1.9013793103448278e-05, "loss": 0.3098, "step": 4598 }, { "epoch": 15.858620689655172, "grad_norm": 1.1047841310501099, "learning_rate": 1.901793103448276e-05, "loss": 0.3144, "step": 4599 }, { "epoch": 15.862068965517242, "grad_norm": 2.1071276664733887, "learning_rate": 1.9022068965517242e-05, "loss": 0.4785, "step": 4600 }, { "epoch": 15.86551724137931, "grad_norm": 0.9369087219238281, "learning_rate": 1.9026206896551724e-05, "loss": 0.3217, "step": 4601 }, { "epoch": 15.86896551724138, "grad_norm": 1.092919111251831, "learning_rate": 1.903034482758621e-05, "loss": 0.3079, "step": 4602 }, { "epoch": 15.872413793103448, "grad_norm": 0.9812614321708679, "learning_rate": 1.903448275862069e-05, "loss": 0.3335, "step": 4603 }, { "epoch": 15.875862068965517, "grad_norm": 0.8782328963279724, "learning_rate": 1.9038620689655173e-05, "loss": 0.2971, "step": 4604 }, { "epoch": 15.879310344827585, "grad_norm": 0.7524398565292358, "learning_rate": 1.9042758620689655e-05, "loss": 0.3083, "step": 4605 }, { "epoch": 15.882758620689655, "grad_norm": 0.8462172150611877, "learning_rate": 1.9046896551724137e-05, "loss": 0.3166, "step": 4606 }, { "epoch": 15.886206896551725, "grad_norm": 0.9990975856781006, "learning_rate": 1.9051034482758623e-05, "loss": 0.3618, "step": 4607 }, { "epoch": 15.889655172413793, "grad_norm": 0.7566094994544983, "learning_rate": 1.90551724137931e-05, "loss": 0.2695, "step": 4608 }, { "epoch": 15.893103448275863, "grad_norm": 0.909000813961029, "learning_rate": 1.9059310344827587e-05, "loss": 0.2732, "step": 4609 }, { "epoch": 15.89655172413793, "grad_norm": 0.7561302781105042, "learning_rate": 1.906344827586207e-05, "loss": 0.255, "step": 4610 }, { "epoch": 15.9, "grad_norm": 0.9377039074897766, "learning_rate": 1.9067586206896554e-05, "loss": 0.3116, "step": 4611 }, { "epoch": 15.903448275862068, "grad_norm": 0.8888034224510193, "learning_rate": 1.9071724137931033e-05, "loss": 0.2855, "step": 4612 }, { "epoch": 15.906896551724138, "grad_norm": 0.7053468823432922, "learning_rate": 1.9075862068965518e-05, "loss": 0.2846, "step": 4613 }, { "epoch": 15.910344827586208, "grad_norm": 0.8430263996124268, "learning_rate": 1.908e-05, "loss": 0.2802, "step": 4614 }, { "epoch": 15.913793103448276, "grad_norm": 0.9184257388114929, "learning_rate": 1.9084137931034486e-05, "loss": 0.3343, "step": 4615 }, { "epoch": 15.917241379310346, "grad_norm": 0.7882572412490845, "learning_rate": 1.9088275862068964e-05, "loss": 0.2863, "step": 4616 }, { "epoch": 15.920689655172414, "grad_norm": 0.7694082260131836, "learning_rate": 1.909241379310345e-05, "loss": 0.3124, "step": 4617 }, { "epoch": 15.924137931034483, "grad_norm": 1.103492259979248, "learning_rate": 1.9096551724137932e-05, "loss": 0.3011, "step": 4618 }, { "epoch": 15.927586206896551, "grad_norm": 0.928768515586853, "learning_rate": 1.9100689655172414e-05, "loss": 0.2929, "step": 4619 }, { "epoch": 15.931034482758621, "grad_norm": 1.0636709928512573, "learning_rate": 1.9104827586206896e-05, "loss": 0.2658, "step": 4620 }, { "epoch": 15.934482758620689, "grad_norm": 1.3083328008651733, "learning_rate": 1.9108965517241378e-05, "loss": 0.2884, "step": 4621 }, { "epoch": 15.937931034482759, "grad_norm": 1.2800313234329224, "learning_rate": 1.9113103448275863e-05, "loss": 0.3008, "step": 4622 }, { "epoch": 15.941379310344828, "grad_norm": 1.3219512701034546, "learning_rate": 1.9117241379310345e-05, "loss": 0.2906, "step": 4623 }, { "epoch": 15.944827586206896, "grad_norm": 1.5300076007843018, "learning_rate": 1.9121379310344827e-05, "loss": 0.3485, "step": 4624 }, { "epoch": 15.948275862068966, "grad_norm": 1.4369112253189087, "learning_rate": 1.912551724137931e-05, "loss": 0.4037, "step": 4625 }, { "epoch": 15.951724137931034, "grad_norm": 0.6383153200149536, "learning_rate": 1.9129655172413795e-05, "loss": 0.3062, "step": 4626 }, { "epoch": 15.955172413793104, "grad_norm": 0.8921545743942261, "learning_rate": 1.9133793103448277e-05, "loss": 0.298, "step": 4627 }, { "epoch": 15.958620689655172, "grad_norm": 0.8387024998664856, "learning_rate": 1.9137931034482762e-05, "loss": 0.2996, "step": 4628 }, { "epoch": 15.962068965517242, "grad_norm": 0.6242976784706116, "learning_rate": 1.914206896551724e-05, "loss": 0.2843, "step": 4629 }, { "epoch": 15.96551724137931, "grad_norm": 0.7009451389312744, "learning_rate": 1.9146206896551726e-05, "loss": 0.3309, "step": 4630 }, { "epoch": 15.96896551724138, "grad_norm": 0.6893792748451233, "learning_rate": 1.9150344827586208e-05, "loss": 0.293, "step": 4631 }, { "epoch": 15.972413793103449, "grad_norm": 0.69040447473526, "learning_rate": 1.915448275862069e-05, "loss": 0.265, "step": 4632 }, { "epoch": 15.975862068965517, "grad_norm": 1.0033214092254639, "learning_rate": 1.9158620689655172e-05, "loss": 0.2878, "step": 4633 }, { "epoch": 15.979310344827587, "grad_norm": 0.8012151122093201, "learning_rate": 1.9162758620689654e-05, "loss": 0.288, "step": 4634 }, { "epoch": 15.982758620689655, "grad_norm": 0.8612509369850159, "learning_rate": 1.916689655172414e-05, "loss": 0.2866, "step": 4635 }, { "epoch": 15.986206896551725, "grad_norm": 0.666917085647583, "learning_rate": 1.917103448275862e-05, "loss": 0.2933, "step": 4636 }, { "epoch": 15.989655172413793, "grad_norm": 1.0895158052444458, "learning_rate": 1.9175172413793104e-05, "loss": 0.3254, "step": 4637 }, { "epoch": 15.993103448275862, "grad_norm": 1.5805201530456543, "learning_rate": 1.9179310344827586e-05, "loss": 0.273, "step": 4638 }, { "epoch": 15.99655172413793, "grad_norm": 1.0823298692703247, "learning_rate": 1.918344827586207e-05, "loss": 0.3189, "step": 4639 }, { "epoch": 16.0, "grad_norm": 1.1971900463104248, "learning_rate": 1.9187586206896553e-05, "loss": 0.3966, "step": 4640 }, { "epoch": 16.00344827586207, "grad_norm": 0.8276216387748718, "learning_rate": 1.9191724137931035e-05, "loss": 0.3339, "step": 4641 }, { "epoch": 16.00689655172414, "grad_norm": 0.781886875629425, "learning_rate": 1.9195862068965517e-05, "loss": 0.3153, "step": 4642 }, { "epoch": 16.010344827586206, "grad_norm": 0.8505652546882629, "learning_rate": 1.9200000000000003e-05, "loss": 0.2986, "step": 4643 }, { "epoch": 16.013793103448275, "grad_norm": 0.611860454082489, "learning_rate": 1.9204137931034485e-05, "loss": 0.2901, "step": 4644 }, { "epoch": 16.017241379310345, "grad_norm": 0.8117966055870056, "learning_rate": 1.9208275862068963e-05, "loss": 0.3138, "step": 4645 }, { "epoch": 16.020689655172415, "grad_norm": 0.7904847860336304, "learning_rate": 1.921241379310345e-05, "loss": 0.275, "step": 4646 }, { "epoch": 16.02413793103448, "grad_norm": 0.8812517523765564, "learning_rate": 1.921655172413793e-05, "loss": 0.3243, "step": 4647 }, { "epoch": 16.02758620689655, "grad_norm": 0.6421033143997192, "learning_rate": 1.9220689655172416e-05, "loss": 0.2711, "step": 4648 }, { "epoch": 16.03103448275862, "grad_norm": 0.6232922673225403, "learning_rate": 1.9224827586206895e-05, "loss": 0.3021, "step": 4649 }, { "epoch": 16.03448275862069, "grad_norm": 0.6339751482009888, "learning_rate": 1.922896551724138e-05, "loss": 0.2866, "step": 4650 }, { "epoch": 16.03793103448276, "grad_norm": 0.8206283450126648, "learning_rate": 1.9233103448275862e-05, "loss": 0.3229, "step": 4651 }, { "epoch": 16.041379310344826, "grad_norm": 0.6213886737823486, "learning_rate": 1.9237241379310347e-05, "loss": 0.2818, "step": 4652 }, { "epoch": 16.044827586206896, "grad_norm": 0.8471933007240295, "learning_rate": 1.9241379310344826e-05, "loss": 0.2826, "step": 4653 }, { "epoch": 16.048275862068966, "grad_norm": 0.7194118499755859, "learning_rate": 1.924551724137931e-05, "loss": 0.2581, "step": 4654 }, { "epoch": 16.051724137931036, "grad_norm": 0.6887535452842712, "learning_rate": 1.9249655172413794e-05, "loss": 0.303, "step": 4655 }, { "epoch": 16.055172413793102, "grad_norm": 1.0506079196929932, "learning_rate": 1.925379310344828e-05, "loss": 0.2484, "step": 4656 }, { "epoch": 16.05862068965517, "grad_norm": 0.8906229734420776, "learning_rate": 1.9257931034482758e-05, "loss": 0.2838, "step": 4657 }, { "epoch": 16.06206896551724, "grad_norm": 0.9563008546829224, "learning_rate": 1.926206896551724e-05, "loss": 0.2948, "step": 4658 }, { "epoch": 16.06551724137931, "grad_norm": 0.8857884407043457, "learning_rate": 1.9266206896551725e-05, "loss": 0.345, "step": 4659 }, { "epoch": 16.06896551724138, "grad_norm": 1.0820441246032715, "learning_rate": 1.9270344827586207e-05, "loss": 0.2907, "step": 4660 }, { "epoch": 16.072413793103447, "grad_norm": 1.5944888591766357, "learning_rate": 1.9274482758620692e-05, "loss": 0.3032, "step": 4661 }, { "epoch": 16.075862068965517, "grad_norm": 1.2530431747436523, "learning_rate": 1.927862068965517e-05, "loss": 0.2875, "step": 4662 }, { "epoch": 16.079310344827586, "grad_norm": 1.023216724395752, "learning_rate": 1.9282758620689656e-05, "loss": 0.3112, "step": 4663 }, { "epoch": 16.082758620689656, "grad_norm": 0.9283360242843628, "learning_rate": 1.928689655172414e-05, "loss": 0.3103, "step": 4664 }, { "epoch": 16.086206896551722, "grad_norm": 1.7988966703414917, "learning_rate": 1.9291034482758624e-05, "loss": 0.3752, "step": 4665 }, { "epoch": 16.089655172413792, "grad_norm": 0.9216311573982239, "learning_rate": 1.9295172413793102e-05, "loss": 0.3547, "step": 4666 }, { "epoch": 16.093103448275862, "grad_norm": 0.7168927788734436, "learning_rate": 1.9299310344827588e-05, "loss": 0.3055, "step": 4667 }, { "epoch": 16.09655172413793, "grad_norm": 0.6142680048942566, "learning_rate": 1.930344827586207e-05, "loss": 0.2944, "step": 4668 }, { "epoch": 16.1, "grad_norm": 0.7634071707725525, "learning_rate": 1.9307586206896555e-05, "loss": 0.2893, "step": 4669 }, { "epoch": 16.103448275862068, "grad_norm": 0.8944082260131836, "learning_rate": 1.9311724137931034e-05, "loss": 0.3136, "step": 4670 }, { "epoch": 16.106896551724137, "grad_norm": 0.8028529286384583, "learning_rate": 1.9315862068965516e-05, "loss": 0.3175, "step": 4671 }, { "epoch": 16.110344827586207, "grad_norm": 0.6663626432418823, "learning_rate": 1.932e-05, "loss": 0.298, "step": 4672 }, { "epoch": 16.113793103448277, "grad_norm": 0.7894071340560913, "learning_rate": 1.9324137931034483e-05, "loss": 0.2886, "step": 4673 }, { "epoch": 16.117241379310343, "grad_norm": 0.7497895956039429, "learning_rate": 1.9328275862068965e-05, "loss": 0.2681, "step": 4674 }, { "epoch": 16.120689655172413, "grad_norm": 0.8983267545700073, "learning_rate": 1.9332413793103447e-05, "loss": 0.2879, "step": 4675 }, { "epoch": 16.124137931034483, "grad_norm": 0.768577516078949, "learning_rate": 1.9336551724137933e-05, "loss": 0.3069, "step": 4676 }, { "epoch": 16.127586206896552, "grad_norm": 1.406846523284912, "learning_rate": 1.9340689655172415e-05, "loss": 0.3032, "step": 4677 }, { "epoch": 16.131034482758622, "grad_norm": 0.6752268075942993, "learning_rate": 1.9344827586206897e-05, "loss": 0.3004, "step": 4678 }, { "epoch": 16.13448275862069, "grad_norm": 1.1879758834838867, "learning_rate": 1.934896551724138e-05, "loss": 0.2854, "step": 4679 }, { "epoch": 16.137931034482758, "grad_norm": 0.8848440647125244, "learning_rate": 1.9353103448275864e-05, "loss": 0.2811, "step": 4680 }, { "epoch": 16.141379310344828, "grad_norm": 1.670696496963501, "learning_rate": 1.9357241379310346e-05, "loss": 0.2686, "step": 4681 }, { "epoch": 16.144827586206898, "grad_norm": 1.0325735807418823, "learning_rate": 1.936137931034483e-05, "loss": 0.2785, "step": 4682 }, { "epoch": 16.148275862068967, "grad_norm": 1.0368590354919434, "learning_rate": 1.936551724137931e-05, "loss": 0.2729, "step": 4683 }, { "epoch": 16.151724137931033, "grad_norm": 2.2292168140411377, "learning_rate": 1.9369655172413792e-05, "loss": 0.2672, "step": 4684 }, { "epoch": 16.155172413793103, "grad_norm": 0.952910304069519, "learning_rate": 1.9373793103448278e-05, "loss": 0.3074, "step": 4685 }, { "epoch": 16.158620689655173, "grad_norm": 2.373979330062866, "learning_rate": 1.9377931034482756e-05, "loss": 0.295, "step": 4686 }, { "epoch": 16.162068965517243, "grad_norm": 1.0205119848251343, "learning_rate": 1.9382068965517242e-05, "loss": 0.2683, "step": 4687 }, { "epoch": 16.16551724137931, "grad_norm": 3.0496866703033447, "learning_rate": 1.9386206896551724e-05, "loss": 0.2938, "step": 4688 }, { "epoch": 16.16896551724138, "grad_norm": 0.9306153059005737, "learning_rate": 1.939034482758621e-05, "loss": 0.2772, "step": 4689 }, { "epoch": 16.17241379310345, "grad_norm": 1.9051198959350586, "learning_rate": 1.9394482758620688e-05, "loss": 0.43, "step": 4690 }, { "epoch": 16.175862068965518, "grad_norm": 0.6662057042121887, "learning_rate": 1.9398620689655173e-05, "loss": 0.3622, "step": 4691 }, { "epoch": 16.179310344827588, "grad_norm": 0.6140010356903076, "learning_rate": 1.9402758620689655e-05, "loss": 0.2812, "step": 4692 }, { "epoch": 16.182758620689654, "grad_norm": 0.6694877743721008, "learning_rate": 1.940689655172414e-05, "loss": 0.3051, "step": 4693 }, { "epoch": 16.186206896551724, "grad_norm": 0.6919894218444824, "learning_rate": 1.9411034482758623e-05, "loss": 0.2957, "step": 4694 }, { "epoch": 16.189655172413794, "grad_norm": 0.8460975885391235, "learning_rate": 1.9415172413793105e-05, "loss": 0.2858, "step": 4695 }, { "epoch": 16.193103448275863, "grad_norm": 0.8830814957618713, "learning_rate": 1.9419310344827587e-05, "loss": 0.2994, "step": 4696 }, { "epoch": 16.19655172413793, "grad_norm": 0.7449194192886353, "learning_rate": 1.942344827586207e-05, "loss": 0.283, "step": 4697 }, { "epoch": 16.2, "grad_norm": 0.8612818717956543, "learning_rate": 1.9427586206896554e-05, "loss": 0.2856, "step": 4698 }, { "epoch": 16.20344827586207, "grad_norm": 1.2145068645477295, "learning_rate": 1.9431724137931033e-05, "loss": 0.2655, "step": 4699 }, { "epoch": 16.20689655172414, "grad_norm": 0.839898407459259, "learning_rate": 1.9435862068965518e-05, "loss": 0.3036, "step": 4700 }, { "epoch": 16.21034482758621, "grad_norm": 1.006513237953186, "learning_rate": 1.944e-05, "loss": 0.2665, "step": 4701 }, { "epoch": 16.213793103448275, "grad_norm": 0.8165764212608337, "learning_rate": 1.9444137931034486e-05, "loss": 0.2631, "step": 4702 }, { "epoch": 16.217241379310344, "grad_norm": 0.9838398694992065, "learning_rate": 1.9448275862068964e-05, "loss": 0.2709, "step": 4703 }, { "epoch": 16.220689655172414, "grad_norm": 0.7291871309280396, "learning_rate": 1.945241379310345e-05, "loss": 0.2763, "step": 4704 }, { "epoch": 16.224137931034484, "grad_norm": 1.148093819618225, "learning_rate": 1.945655172413793e-05, "loss": 0.2913, "step": 4705 }, { "epoch": 16.22758620689655, "grad_norm": 0.7976373434066772, "learning_rate": 1.9460689655172417e-05, "loss": 0.2706, "step": 4706 }, { "epoch": 16.23103448275862, "grad_norm": 1.105948567390442, "learning_rate": 1.9464827586206896e-05, "loss": 0.2936, "step": 4707 }, { "epoch": 16.23448275862069, "grad_norm": 0.6920824646949768, "learning_rate": 1.9468965517241378e-05, "loss": 0.2845, "step": 4708 }, { "epoch": 16.23793103448276, "grad_norm": 0.7491571307182312, "learning_rate": 1.9473103448275863e-05, "loss": 0.2649, "step": 4709 }, { "epoch": 16.24137931034483, "grad_norm": 0.8020318746566772, "learning_rate": 1.9477241379310345e-05, "loss": 0.2819, "step": 4710 }, { "epoch": 16.244827586206895, "grad_norm": 1.3097506761550903, "learning_rate": 1.9481379310344827e-05, "loss": 0.3039, "step": 4711 }, { "epoch": 16.248275862068965, "grad_norm": 0.7958338260650635, "learning_rate": 1.948551724137931e-05, "loss": 0.2998, "step": 4712 }, { "epoch": 16.251724137931035, "grad_norm": 1.221408724784851, "learning_rate": 1.9489655172413795e-05, "loss": 0.3282, "step": 4713 }, { "epoch": 16.255172413793105, "grad_norm": 2.7691338062286377, "learning_rate": 1.9493793103448277e-05, "loss": 0.3336, "step": 4714 }, { "epoch": 16.25862068965517, "grad_norm": 1.7506555318832397, "learning_rate": 1.949793103448276e-05, "loss": 0.4457, "step": 4715 }, { "epoch": 16.26206896551724, "grad_norm": 0.6286019086837769, "learning_rate": 1.950206896551724e-05, "loss": 0.3262, "step": 4716 }, { "epoch": 16.26551724137931, "grad_norm": 0.7621632218360901, "learning_rate": 1.9506206896551726e-05, "loss": 0.3458, "step": 4717 }, { "epoch": 16.26896551724138, "grad_norm": 0.706120491027832, "learning_rate": 1.9510344827586208e-05, "loss": 0.3252, "step": 4718 }, { "epoch": 16.27241379310345, "grad_norm": 0.7331655025482178, "learning_rate": 1.951448275862069e-05, "loss": 0.3319, "step": 4719 }, { "epoch": 16.275862068965516, "grad_norm": 0.6857418417930603, "learning_rate": 1.9518620689655172e-05, "loss": 0.3199, "step": 4720 }, { "epoch": 16.279310344827586, "grad_norm": 0.8042430281639099, "learning_rate": 1.9522758620689654e-05, "loss": 0.2955, "step": 4721 }, { "epoch": 16.282758620689656, "grad_norm": 1.4898655414581299, "learning_rate": 1.952689655172414e-05, "loss": 0.3542, "step": 4722 }, { "epoch": 16.286206896551725, "grad_norm": 0.8255028128623962, "learning_rate": 1.9531034482758618e-05, "loss": 0.3274, "step": 4723 }, { "epoch": 16.28965517241379, "grad_norm": 0.7339754700660706, "learning_rate": 1.9535172413793104e-05, "loss": 0.3047, "step": 4724 }, { "epoch": 16.29310344827586, "grad_norm": 0.7897910475730896, "learning_rate": 1.9539310344827586e-05, "loss": 0.2733, "step": 4725 }, { "epoch": 16.29655172413793, "grad_norm": 0.78590989112854, "learning_rate": 1.954344827586207e-05, "loss": 0.2946, "step": 4726 }, { "epoch": 16.3, "grad_norm": 0.802092969417572, "learning_rate": 1.9547586206896553e-05, "loss": 0.3245, "step": 4727 }, { "epoch": 16.30344827586207, "grad_norm": 0.8769364356994629, "learning_rate": 1.9551724137931035e-05, "loss": 0.2866, "step": 4728 }, { "epoch": 16.306896551724137, "grad_norm": 2.1812336444854736, "learning_rate": 1.9555862068965517e-05, "loss": 0.3024, "step": 4729 }, { "epoch": 16.310344827586206, "grad_norm": 1.0455738306045532, "learning_rate": 1.9560000000000002e-05, "loss": 0.3082, "step": 4730 }, { "epoch": 16.313793103448276, "grad_norm": 0.9997264742851257, "learning_rate": 1.9564137931034484e-05, "loss": 0.2672, "step": 4731 }, { "epoch": 16.317241379310346, "grad_norm": 0.7288722395896912, "learning_rate": 1.9568275862068966e-05, "loss": 0.2853, "step": 4732 }, { "epoch": 16.320689655172412, "grad_norm": 0.9426423907279968, "learning_rate": 1.957241379310345e-05, "loss": 0.2834, "step": 4733 }, { "epoch": 16.324137931034482, "grad_norm": 1.0655198097229004, "learning_rate": 1.957655172413793e-05, "loss": 0.2994, "step": 4734 }, { "epoch": 16.32758620689655, "grad_norm": 1.3401422500610352, "learning_rate": 1.9580689655172416e-05, "loss": 0.2776, "step": 4735 }, { "epoch": 16.33103448275862, "grad_norm": 0.9964897632598877, "learning_rate": 1.9584827586206895e-05, "loss": 0.2933, "step": 4736 }, { "epoch": 16.33448275862069, "grad_norm": 1.2271391153335571, "learning_rate": 1.958896551724138e-05, "loss": 0.2711, "step": 4737 }, { "epoch": 16.337931034482757, "grad_norm": 3.261791229248047, "learning_rate": 1.9593103448275862e-05, "loss": 0.281, "step": 4738 }, { "epoch": 16.341379310344827, "grad_norm": 2.049771785736084, "learning_rate": 1.9597241379310347e-05, "loss": 0.3089, "step": 4739 }, { "epoch": 16.344827586206897, "grad_norm": 2.5642168521881104, "learning_rate": 1.9601379310344826e-05, "loss": 0.3989, "step": 4740 }, { "epoch": 16.348275862068967, "grad_norm": 0.759894609451294, "learning_rate": 1.960551724137931e-05, "loss": 0.3562, "step": 4741 }, { "epoch": 16.351724137931033, "grad_norm": 0.8095716238021851, "learning_rate": 1.9609655172413793e-05, "loss": 0.2987, "step": 4742 }, { "epoch": 16.355172413793102, "grad_norm": 0.6842406392097473, "learning_rate": 1.961379310344828e-05, "loss": 0.3036, "step": 4743 }, { "epoch": 16.358620689655172, "grad_norm": 0.8027846217155457, "learning_rate": 1.9617931034482757e-05, "loss": 0.296, "step": 4744 }, { "epoch": 16.362068965517242, "grad_norm": 0.9689425826072693, "learning_rate": 1.9622068965517243e-05, "loss": 0.2833, "step": 4745 }, { "epoch": 16.36551724137931, "grad_norm": 0.8679623007774353, "learning_rate": 1.9626206896551725e-05, "loss": 0.3066, "step": 4746 }, { "epoch": 16.368965517241378, "grad_norm": 0.6608709096908569, "learning_rate": 1.9630344827586207e-05, "loss": 0.3132, "step": 4747 }, { "epoch": 16.372413793103448, "grad_norm": 0.8068433403968811, "learning_rate": 1.963448275862069e-05, "loss": 0.2849, "step": 4748 }, { "epoch": 16.375862068965517, "grad_norm": 0.8011911511421204, "learning_rate": 1.963862068965517e-05, "loss": 0.2452, "step": 4749 }, { "epoch": 16.379310344827587, "grad_norm": 0.7519246935844421, "learning_rate": 1.9642758620689656e-05, "loss": 0.28, "step": 4750 }, { "epoch": 16.382758620689657, "grad_norm": 1.2719959020614624, "learning_rate": 1.964689655172414e-05, "loss": 0.2805, "step": 4751 }, { "epoch": 16.386206896551723, "grad_norm": 0.7830546498298645, "learning_rate": 1.965103448275862e-05, "loss": 0.2954, "step": 4752 }, { "epoch": 16.389655172413793, "grad_norm": 1.1339285373687744, "learning_rate": 1.9655172413793102e-05, "loss": 0.2627, "step": 4753 }, { "epoch": 16.393103448275863, "grad_norm": 0.9397374987602234, "learning_rate": 1.9659310344827588e-05, "loss": 0.3163, "step": 4754 }, { "epoch": 16.396551724137932, "grad_norm": 1.2038863897323608, "learning_rate": 1.966344827586207e-05, "loss": 0.2881, "step": 4755 }, { "epoch": 16.4, "grad_norm": 1.3347631692886353, "learning_rate": 1.9667586206896552e-05, "loss": 0.3139, "step": 4756 }, { "epoch": 16.40344827586207, "grad_norm": 1.0964772701263428, "learning_rate": 1.9671724137931034e-05, "loss": 0.2745, "step": 4757 }, { "epoch": 16.406896551724138, "grad_norm": 0.9141931533813477, "learning_rate": 1.967586206896552e-05, "loss": 0.3401, "step": 4758 }, { "epoch": 16.410344827586208, "grad_norm": 1.073175311088562, "learning_rate": 1.968e-05, "loss": 0.3112, "step": 4759 }, { "epoch": 16.413793103448278, "grad_norm": 0.8746545910835266, "learning_rate": 1.9684137931034483e-05, "loss": 0.2657, "step": 4760 }, { "epoch": 16.417241379310344, "grad_norm": 1.5030556917190552, "learning_rate": 1.9688275862068965e-05, "loss": 0.3228, "step": 4761 }, { "epoch": 16.420689655172414, "grad_norm": 1.1956372261047363, "learning_rate": 1.9692413793103447e-05, "loss": 0.2768, "step": 4762 }, { "epoch": 16.424137931034483, "grad_norm": 0.8310409784317017, "learning_rate": 1.9696551724137933e-05, "loss": 0.3011, "step": 4763 }, { "epoch": 16.427586206896553, "grad_norm": 1.4834949970245361, "learning_rate": 1.9700689655172415e-05, "loss": 0.2998, "step": 4764 }, { "epoch": 16.43103448275862, "grad_norm": 1.362236738204956, "learning_rate": 1.9704827586206897e-05, "loss": 0.4078, "step": 4765 }, { "epoch": 16.43448275862069, "grad_norm": 0.7281444668769836, "learning_rate": 1.970896551724138e-05, "loss": 0.3448, "step": 4766 }, { "epoch": 16.43793103448276, "grad_norm": 0.9347448945045471, "learning_rate": 1.9713103448275864e-05, "loss": 0.3176, "step": 4767 }, { "epoch": 16.44137931034483, "grad_norm": 0.6938408017158508, "learning_rate": 1.9717241379310346e-05, "loss": 0.3068, "step": 4768 }, { "epoch": 16.444827586206898, "grad_norm": 1.0916898250579834, "learning_rate": 1.9721379310344828e-05, "loss": 0.307, "step": 4769 }, { "epoch": 16.448275862068964, "grad_norm": 0.8816807270050049, "learning_rate": 1.972551724137931e-05, "loss": 0.2866, "step": 4770 }, { "epoch": 16.451724137931034, "grad_norm": 0.7485337257385254, "learning_rate": 1.9729655172413796e-05, "loss": 0.2689, "step": 4771 }, { "epoch": 16.455172413793104, "grad_norm": 0.6657883524894714, "learning_rate": 1.9733793103448278e-05, "loss": 0.2781, "step": 4772 }, { "epoch": 16.458620689655174, "grad_norm": 0.7840313911437988, "learning_rate": 1.9737931034482756e-05, "loss": 0.3109, "step": 4773 }, { "epoch": 16.46206896551724, "grad_norm": 0.7666704654693604, "learning_rate": 1.9742068965517242e-05, "loss": 0.2991, "step": 4774 }, { "epoch": 16.46551724137931, "grad_norm": 0.6959820985794067, "learning_rate": 1.9746206896551724e-05, "loss": 0.2606, "step": 4775 }, { "epoch": 16.46896551724138, "grad_norm": 0.8930947780609131, "learning_rate": 1.975034482758621e-05, "loss": 0.2808, "step": 4776 }, { "epoch": 16.47241379310345, "grad_norm": 0.8080267906188965, "learning_rate": 1.9754482758620688e-05, "loss": 0.2851, "step": 4777 }, { "epoch": 16.47586206896552, "grad_norm": 0.8766615390777588, "learning_rate": 1.9758620689655173e-05, "loss": 0.2686, "step": 4778 }, { "epoch": 16.479310344827585, "grad_norm": 1.0802311897277832, "learning_rate": 1.9762758620689655e-05, "loss": 0.2903, "step": 4779 }, { "epoch": 16.482758620689655, "grad_norm": 0.887744128704071, "learning_rate": 1.976689655172414e-05, "loss": 0.3345, "step": 4780 }, { "epoch": 16.486206896551725, "grad_norm": 0.9713789820671082, "learning_rate": 1.977103448275862e-05, "loss": 0.2872, "step": 4781 }, { "epoch": 16.489655172413794, "grad_norm": 0.7598459720611572, "learning_rate": 1.9775172413793105e-05, "loss": 0.2838, "step": 4782 }, { "epoch": 16.49310344827586, "grad_norm": 0.8121453523635864, "learning_rate": 1.9779310344827587e-05, "loss": 0.2756, "step": 4783 }, { "epoch": 16.49655172413793, "grad_norm": 0.7384442090988159, "learning_rate": 1.9783448275862072e-05, "loss": 0.2762, "step": 4784 }, { "epoch": 16.5, "grad_norm": 0.9686653017997742, "learning_rate": 1.978758620689655e-05, "loss": 0.2754, "step": 4785 }, { "epoch": 16.50344827586207, "grad_norm": 0.9241976141929626, "learning_rate": 1.9791724137931033e-05, "loss": 0.2819, "step": 4786 }, { "epoch": 16.50689655172414, "grad_norm": 0.8401070833206177, "learning_rate": 1.9795862068965518e-05, "loss": 0.2639, "step": 4787 }, { "epoch": 16.510344827586206, "grad_norm": 0.9444813132286072, "learning_rate": 1.98e-05, "loss": 0.2873, "step": 4788 }, { "epoch": 16.513793103448275, "grad_norm": 1.3934190273284912, "learning_rate": 1.9804137931034482e-05, "loss": 0.3441, "step": 4789 }, { "epoch": 16.517241379310345, "grad_norm": 1.397464394569397, "learning_rate": 1.9808275862068964e-05, "loss": 0.356, "step": 4790 }, { "epoch": 16.520689655172415, "grad_norm": 0.9897643327713013, "learning_rate": 1.981241379310345e-05, "loss": 0.3691, "step": 4791 }, { "epoch": 16.52413793103448, "grad_norm": 0.8187459707260132, "learning_rate": 1.981655172413793e-05, "loss": 0.2861, "step": 4792 }, { "epoch": 16.52758620689655, "grad_norm": 0.5996084809303284, "learning_rate": 1.9820689655172417e-05, "loss": 0.3107, "step": 4793 }, { "epoch": 16.53103448275862, "grad_norm": 0.8771461248397827, "learning_rate": 1.9824827586206896e-05, "loss": 0.315, "step": 4794 }, { "epoch": 16.53448275862069, "grad_norm": 0.797335684299469, "learning_rate": 1.982896551724138e-05, "loss": 0.2919, "step": 4795 }, { "epoch": 16.53793103448276, "grad_norm": 0.7847798466682434, "learning_rate": 1.9833103448275863e-05, "loss": 0.2894, "step": 4796 }, { "epoch": 16.541379310344826, "grad_norm": 0.6951883435249329, "learning_rate": 1.983724137931035e-05, "loss": 0.298, "step": 4797 }, { "epoch": 16.544827586206896, "grad_norm": 0.6456438899040222, "learning_rate": 1.9841379310344827e-05, "loss": 0.248, "step": 4798 }, { "epoch": 16.548275862068966, "grad_norm": 0.6901739835739136, "learning_rate": 1.984551724137931e-05, "loss": 0.2884, "step": 4799 }, { "epoch": 16.551724137931036, "grad_norm": 0.9272278547286987, "learning_rate": 1.9849655172413795e-05, "loss": 0.2949, "step": 4800 }, { "epoch": 16.555172413793102, "grad_norm": 0.6530275940895081, "learning_rate": 1.9853793103448277e-05, "loss": 0.3015, "step": 4801 }, { "epoch": 16.55862068965517, "grad_norm": 0.8245791792869568, "learning_rate": 1.985793103448276e-05, "loss": 0.2714, "step": 4802 }, { "epoch": 16.56206896551724, "grad_norm": 1.064794659614563, "learning_rate": 1.986206896551724e-05, "loss": 0.2868, "step": 4803 }, { "epoch": 16.56551724137931, "grad_norm": 1.1424816846847534, "learning_rate": 1.9866206896551726e-05, "loss": 0.2882, "step": 4804 }, { "epoch": 16.56896551724138, "grad_norm": 0.7488410472869873, "learning_rate": 1.9870344827586208e-05, "loss": 0.2952, "step": 4805 }, { "epoch": 16.572413793103447, "grad_norm": 0.7460827231407166, "learning_rate": 1.987448275862069e-05, "loss": 0.2632, "step": 4806 }, { "epoch": 16.575862068965517, "grad_norm": 0.7528135180473328, "learning_rate": 1.9878620689655172e-05, "loss": 0.2441, "step": 4807 }, { "epoch": 16.579310344827586, "grad_norm": 0.8517968654632568, "learning_rate": 1.9882758620689657e-05, "loss": 0.2713, "step": 4808 }, { "epoch": 16.582758620689656, "grad_norm": 1.4074610471725464, "learning_rate": 1.988689655172414e-05, "loss": 0.2719, "step": 4809 }, { "epoch": 16.586206896551722, "grad_norm": 1.4104588031768799, "learning_rate": 1.989103448275862e-05, "loss": 0.3081, "step": 4810 }, { "epoch": 16.589655172413792, "grad_norm": 0.9436816573143005, "learning_rate": 1.9895172413793104e-05, "loss": 0.3003, "step": 4811 }, { "epoch": 16.593103448275862, "grad_norm": 2.1115238666534424, "learning_rate": 1.9899310344827586e-05, "loss": 0.2762, "step": 4812 }, { "epoch": 16.59655172413793, "grad_norm": 1.536932349205017, "learning_rate": 1.990344827586207e-05, "loss": 0.2882, "step": 4813 }, { "epoch": 16.6, "grad_norm": 1.6145310401916504, "learning_rate": 1.990758620689655e-05, "loss": 0.343, "step": 4814 }, { "epoch": 16.603448275862068, "grad_norm": 1.9171558618545532, "learning_rate": 1.9911724137931035e-05, "loss": 0.4174, "step": 4815 }, { "epoch": 16.606896551724137, "grad_norm": 1.2131729125976562, "learning_rate": 1.9915862068965517e-05, "loss": 0.3459, "step": 4816 }, { "epoch": 16.610344827586207, "grad_norm": 1.245334506034851, "learning_rate": 1.9920000000000002e-05, "loss": 0.3058, "step": 4817 }, { "epoch": 16.613793103448277, "grad_norm": 0.6830624341964722, "learning_rate": 1.992413793103448e-05, "loss": 0.2828, "step": 4818 }, { "epoch": 16.617241379310343, "grad_norm": 0.8199236989021301, "learning_rate": 1.9928275862068966e-05, "loss": 0.3089, "step": 4819 }, { "epoch": 16.620689655172413, "grad_norm": 1.80463707447052, "learning_rate": 1.993241379310345e-05, "loss": 0.2834, "step": 4820 }, { "epoch": 16.624137931034483, "grad_norm": 0.6648328900337219, "learning_rate": 1.9936551724137934e-05, "loss": 0.3061, "step": 4821 }, { "epoch": 16.627586206896552, "grad_norm": 1.2964972257614136, "learning_rate": 1.9940689655172412e-05, "loss": 0.283, "step": 4822 }, { "epoch": 16.631034482758622, "grad_norm": 0.8277648091316223, "learning_rate": 1.9944827586206898e-05, "loss": 0.3203, "step": 4823 }, { "epoch": 16.63448275862069, "grad_norm": 0.6179627180099487, "learning_rate": 1.994896551724138e-05, "loss": 0.2746, "step": 4824 }, { "epoch": 16.637931034482758, "grad_norm": 0.9847938418388367, "learning_rate": 1.9953103448275862e-05, "loss": 0.3127, "step": 4825 }, { "epoch": 16.641379310344828, "grad_norm": 0.7410935163497925, "learning_rate": 1.9957241379310347e-05, "loss": 0.2868, "step": 4826 }, { "epoch": 16.644827586206898, "grad_norm": 1.6996082067489624, "learning_rate": 1.9961379310344826e-05, "loss": 0.27, "step": 4827 }, { "epoch": 16.648275862068964, "grad_norm": 0.751474142074585, "learning_rate": 1.996551724137931e-05, "loss": 0.3142, "step": 4828 }, { "epoch": 16.651724137931033, "grad_norm": 1.8570919036865234, "learning_rate": 1.9969655172413793e-05, "loss": 0.2567, "step": 4829 }, { "epoch": 16.655172413793103, "grad_norm": 1.0576977729797363, "learning_rate": 1.997379310344828e-05, "loss": 0.2934, "step": 4830 }, { "epoch": 16.658620689655173, "grad_norm": 0.7666543126106262, "learning_rate": 1.9977931034482757e-05, "loss": 0.2733, "step": 4831 }, { "epoch": 16.662068965517243, "grad_norm": 1.0137913227081299, "learning_rate": 1.9982068965517243e-05, "loss": 0.266, "step": 4832 }, { "epoch": 16.66551724137931, "grad_norm": 5.1964826583862305, "learning_rate": 1.9986206896551725e-05, "loss": 0.278, "step": 4833 }, { "epoch": 16.66896551724138, "grad_norm": 0.876269519329071, "learning_rate": 1.999034482758621e-05, "loss": 0.2868, "step": 4834 }, { "epoch": 16.67241379310345, "grad_norm": 1.1908824443817139, "learning_rate": 1.999448275862069e-05, "loss": 0.3192, "step": 4835 }, { "epoch": 16.675862068965518, "grad_norm": 1.1907267570495605, "learning_rate": 1.9998620689655174e-05, "loss": 0.2776, "step": 4836 }, { "epoch": 16.679310344827588, "grad_norm": 1.6074813604354858, "learning_rate": 2.0002758620689656e-05, "loss": 0.2613, "step": 4837 }, { "epoch": 16.682758620689654, "grad_norm": 1.1900389194488525, "learning_rate": 2.000689655172414e-05, "loss": 0.2895, "step": 4838 }, { "epoch": 16.686206896551724, "grad_norm": 1.432253122329712, "learning_rate": 2.001103448275862e-05, "loss": 0.3135, "step": 4839 }, { "epoch": 16.689655172413794, "grad_norm": 2.538139581680298, "learning_rate": 2.0015172413793102e-05, "loss": 0.4295, "step": 4840 }, { "epoch": 16.693103448275863, "grad_norm": 0.7055387496948242, "learning_rate": 2.0019310344827588e-05, "loss": 0.3761, "step": 4841 }, { "epoch": 16.69655172413793, "grad_norm": 0.6238194108009338, "learning_rate": 2.002344827586207e-05, "loss": 0.3086, "step": 4842 }, { "epoch": 16.7, "grad_norm": 1.0187398195266724, "learning_rate": 2.0027586206896552e-05, "loss": 0.3241, "step": 4843 }, { "epoch": 16.70344827586207, "grad_norm": 0.926605224609375, "learning_rate": 2.0031724137931034e-05, "loss": 0.2811, "step": 4844 }, { "epoch": 16.70689655172414, "grad_norm": 0.738196611404419, "learning_rate": 2.003586206896552e-05, "loss": 0.3309, "step": 4845 }, { "epoch": 16.71034482758621, "grad_norm": 0.8137768507003784, "learning_rate": 2.004e-05, "loss": 0.2643, "step": 4846 }, { "epoch": 16.713793103448275, "grad_norm": 0.667001485824585, "learning_rate": 2.0044137931034483e-05, "loss": 0.299, "step": 4847 }, { "epoch": 16.717241379310344, "grad_norm": 0.8153502345085144, "learning_rate": 2.0048275862068965e-05, "loss": 0.3104, "step": 4848 }, { "epoch": 16.720689655172414, "grad_norm": 0.8777053952217102, "learning_rate": 2.005241379310345e-05, "loss": 0.2628, "step": 4849 }, { "epoch": 16.724137931034484, "grad_norm": 0.8080638647079468, "learning_rate": 2.0056551724137933e-05, "loss": 0.2813, "step": 4850 }, { "epoch": 16.72758620689655, "grad_norm": 1.6035438776016235, "learning_rate": 2.006068965517241e-05, "loss": 0.2841, "step": 4851 }, { "epoch": 16.73103448275862, "grad_norm": 1.1488033533096313, "learning_rate": 2.0064827586206897e-05, "loss": 0.2787, "step": 4852 }, { "epoch": 16.73448275862069, "grad_norm": 1.0301132202148438, "learning_rate": 2.006896551724138e-05, "loss": 0.2942, "step": 4853 }, { "epoch": 16.73793103448276, "grad_norm": 0.8390212059020996, "learning_rate": 2.0073103448275864e-05, "loss": 0.3025, "step": 4854 }, { "epoch": 16.74137931034483, "grad_norm": 0.9131553769111633, "learning_rate": 2.0077241379310343e-05, "loss": 0.2961, "step": 4855 }, { "epoch": 16.744827586206895, "grad_norm": 0.7609813809394836, "learning_rate": 2.0081379310344828e-05, "loss": 0.2954, "step": 4856 }, { "epoch": 16.748275862068965, "grad_norm": 1.3211098909378052, "learning_rate": 2.008551724137931e-05, "loss": 0.306, "step": 4857 }, { "epoch": 16.751724137931035, "grad_norm": 0.8869441747665405, "learning_rate": 2.0089655172413796e-05, "loss": 0.2978, "step": 4858 }, { "epoch": 16.755172413793105, "grad_norm": 0.7586424946784973, "learning_rate": 2.0093793103448278e-05, "loss": 0.2593, "step": 4859 }, { "epoch": 16.75862068965517, "grad_norm": 1.23377525806427, "learning_rate": 2.009793103448276e-05, "loss": 0.3019, "step": 4860 }, { "epoch": 16.76206896551724, "grad_norm": 3.0184569358825684, "learning_rate": 2.010206896551724e-05, "loss": 0.3198, "step": 4861 }, { "epoch": 16.76551724137931, "grad_norm": 1.3292797803878784, "learning_rate": 2.0106206896551724e-05, "loss": 0.3182, "step": 4862 }, { "epoch": 16.76896551724138, "grad_norm": 1.7066706418991089, "learning_rate": 2.011034482758621e-05, "loss": 0.3339, "step": 4863 }, { "epoch": 16.77241379310345, "grad_norm": 1.049187183380127, "learning_rate": 2.0114482758620688e-05, "loss": 0.3393, "step": 4864 }, { "epoch": 16.775862068965516, "grad_norm": 1.5776690244674683, "learning_rate": 2.0118620689655173e-05, "loss": 0.4847, "step": 4865 }, { "epoch": 16.779310344827586, "grad_norm": 0.6124390959739685, "learning_rate": 2.0122758620689655e-05, "loss": 0.3202, "step": 4866 }, { "epoch": 16.782758620689656, "grad_norm": 0.6182342767715454, "learning_rate": 2.012689655172414e-05, "loss": 0.3013, "step": 4867 }, { "epoch": 16.786206896551725, "grad_norm": 0.5936872363090515, "learning_rate": 2.013103448275862e-05, "loss": 0.319, "step": 4868 }, { "epoch": 16.78965517241379, "grad_norm": 0.9565755128860474, "learning_rate": 2.0135172413793105e-05, "loss": 0.3018, "step": 4869 }, { "epoch": 16.79310344827586, "grad_norm": 0.6215330958366394, "learning_rate": 2.0139310344827587e-05, "loss": 0.3109, "step": 4870 }, { "epoch": 16.79655172413793, "grad_norm": 0.5954663157463074, "learning_rate": 2.0143448275862072e-05, "loss": 0.308, "step": 4871 }, { "epoch": 16.8, "grad_norm": 0.9212254881858826, "learning_rate": 2.014758620689655e-05, "loss": 0.32, "step": 4872 }, { "epoch": 16.80344827586207, "grad_norm": 0.5836259722709656, "learning_rate": 2.0151724137931036e-05, "loss": 0.2765, "step": 4873 }, { "epoch": 16.806896551724137, "grad_norm": 0.9411658048629761, "learning_rate": 2.0155862068965518e-05, "loss": 0.2831, "step": 4874 }, { "epoch": 16.810344827586206, "grad_norm": 0.9625186920166016, "learning_rate": 2.016e-05, "loss": 0.2973, "step": 4875 }, { "epoch": 16.813793103448276, "grad_norm": 0.5602714419364929, "learning_rate": 2.0164137931034482e-05, "loss": 0.2729, "step": 4876 }, { "epoch": 16.817241379310346, "grad_norm": 1.0143122673034668, "learning_rate": 2.0168275862068964e-05, "loss": 0.2969, "step": 4877 }, { "epoch": 16.820689655172412, "grad_norm": 0.7735527753829956, "learning_rate": 2.017241379310345e-05, "loss": 0.268, "step": 4878 }, { "epoch": 16.824137931034482, "grad_norm": 0.824016809463501, "learning_rate": 2.017655172413793e-05, "loss": 0.2778, "step": 4879 }, { "epoch": 16.82758620689655, "grad_norm": 0.991077184677124, "learning_rate": 2.0180689655172414e-05, "loss": 0.2621, "step": 4880 }, { "epoch": 16.83103448275862, "grad_norm": 1.4437633752822876, "learning_rate": 2.0184827586206896e-05, "loss": 0.2977, "step": 4881 }, { "epoch": 16.83448275862069, "grad_norm": 1.05059015750885, "learning_rate": 2.018896551724138e-05, "loss": 0.2826, "step": 4882 }, { "epoch": 16.837931034482757, "grad_norm": 1.3206532001495361, "learning_rate": 2.0193103448275863e-05, "loss": 0.2962, "step": 4883 }, { "epoch": 16.841379310344827, "grad_norm": 1.0990300178527832, "learning_rate": 2.0197241379310345e-05, "loss": 0.277, "step": 4884 }, { "epoch": 16.844827586206897, "grad_norm": 1.6725906133651733, "learning_rate": 2.0201379310344827e-05, "loss": 0.278, "step": 4885 }, { "epoch": 16.848275862068967, "grad_norm": 1.287606954574585, "learning_rate": 2.0205517241379312e-05, "loss": 0.3247, "step": 4886 }, { "epoch": 16.851724137931036, "grad_norm": 1.8106781244277954, "learning_rate": 2.0209655172413794e-05, "loss": 0.3264, "step": 4887 }, { "epoch": 16.855172413793102, "grad_norm": 0.8836897611618042, "learning_rate": 2.0213793103448273e-05, "loss": 0.3039, "step": 4888 }, { "epoch": 16.858620689655172, "grad_norm": 1.1643481254577637, "learning_rate": 2.021793103448276e-05, "loss": 0.319, "step": 4889 }, { "epoch": 16.862068965517242, "grad_norm": 1.6230722665786743, "learning_rate": 2.022206896551724e-05, "loss": 0.4524, "step": 4890 }, { "epoch": 16.86551724137931, "grad_norm": 0.6875162720680237, "learning_rate": 2.0226206896551726e-05, "loss": 0.3236, "step": 4891 }, { "epoch": 16.868965517241378, "grad_norm": 1.1458731889724731, "learning_rate": 2.0230344827586208e-05, "loss": 0.3032, "step": 4892 }, { "epoch": 16.872413793103448, "grad_norm": 2.3186087608337402, "learning_rate": 2.023448275862069e-05, "loss": 0.2925, "step": 4893 }, { "epoch": 16.875862068965517, "grad_norm": 0.8002782464027405, "learning_rate": 2.0238620689655172e-05, "loss": 0.2744, "step": 4894 }, { "epoch": 16.879310344827587, "grad_norm": 0.8404428958892822, "learning_rate": 2.0242758620689657e-05, "loss": 0.3005, "step": 4895 }, { "epoch": 16.882758620689657, "grad_norm": 0.7402262687683105, "learning_rate": 2.024689655172414e-05, "loss": 0.276, "step": 4896 }, { "epoch": 16.886206896551723, "grad_norm": 1.4287992715835571, "learning_rate": 2.025103448275862e-05, "loss": 0.2934, "step": 4897 }, { "epoch": 16.889655172413793, "grad_norm": 0.6340870261192322, "learning_rate": 2.0255172413793103e-05, "loss": 0.2887, "step": 4898 }, { "epoch": 16.893103448275863, "grad_norm": 0.6130897998809814, "learning_rate": 2.025931034482759e-05, "loss": 0.293, "step": 4899 }, { "epoch": 16.896551724137932, "grad_norm": 0.6283670663833618, "learning_rate": 2.026344827586207e-05, "loss": 0.2687, "step": 4900 }, { "epoch": 16.9, "grad_norm": 1.0830202102661133, "learning_rate": 2.026758620689655e-05, "loss": 0.3075, "step": 4901 }, { "epoch": 16.90344827586207, "grad_norm": 1.2390114068984985, "learning_rate": 2.0271724137931035e-05, "loss": 0.2971, "step": 4902 }, { "epoch": 16.906896551724138, "grad_norm": 2.059058666229248, "learning_rate": 2.0275862068965517e-05, "loss": 0.2661, "step": 4903 }, { "epoch": 16.910344827586208, "grad_norm": 0.6985297799110413, "learning_rate": 2.0280000000000002e-05, "loss": 0.2683, "step": 4904 }, { "epoch": 16.913793103448278, "grad_norm": 0.920785665512085, "learning_rate": 2.028413793103448e-05, "loss": 0.2752, "step": 4905 }, { "epoch": 16.917241379310344, "grad_norm": 1.7310316562652588, "learning_rate": 2.0288275862068966e-05, "loss": 0.2644, "step": 4906 }, { "epoch": 16.920689655172414, "grad_norm": 1.2103142738342285, "learning_rate": 2.029241379310345e-05, "loss": 0.277, "step": 4907 }, { "epoch": 16.924137931034483, "grad_norm": 0.8169885873794556, "learning_rate": 2.0296551724137934e-05, "loss": 0.2788, "step": 4908 }, { "epoch": 16.927586206896553, "grad_norm": 0.7345200181007385, "learning_rate": 2.0300689655172412e-05, "loss": 0.2774, "step": 4909 }, { "epoch": 16.93103448275862, "grad_norm": 1.4633848667144775, "learning_rate": 2.0304827586206898e-05, "loss": 0.2887, "step": 4910 }, { "epoch": 16.93448275862069, "grad_norm": 0.83945631980896, "learning_rate": 2.030896551724138e-05, "loss": 0.2546, "step": 4911 }, { "epoch": 16.93793103448276, "grad_norm": 1.054445743560791, "learning_rate": 2.0313103448275865e-05, "loss": 0.2944, "step": 4912 }, { "epoch": 16.94137931034483, "grad_norm": 1.4270107746124268, "learning_rate": 2.0317241379310344e-05, "loss": 0.3256, "step": 4913 }, { "epoch": 16.944827586206898, "grad_norm": 1.8172180652618408, "learning_rate": 2.0321379310344826e-05, "loss": 0.3993, "step": 4914 }, { "epoch": 16.948275862068964, "grad_norm": 3.0614306926727295, "learning_rate": 2.032551724137931e-05, "loss": 0.4132, "step": 4915 }, { "epoch": 16.951724137931034, "grad_norm": 0.6290459036827087, "learning_rate": 2.0329655172413793e-05, "loss": 0.3457, "step": 4916 }, { "epoch": 16.955172413793104, "grad_norm": 0.8515801429748535, "learning_rate": 2.0333793103448275e-05, "loss": 0.3345, "step": 4917 }, { "epoch": 16.958620689655174, "grad_norm": 0.9860391616821289, "learning_rate": 2.0337931034482757e-05, "loss": 0.2749, "step": 4918 }, { "epoch": 16.96206896551724, "grad_norm": 1.324578046798706, "learning_rate": 2.0342068965517243e-05, "loss": 0.266, "step": 4919 }, { "epoch": 16.96551724137931, "grad_norm": 0.7015954256057739, "learning_rate": 2.0346206896551725e-05, "loss": 0.297, "step": 4920 }, { "epoch": 16.96896551724138, "grad_norm": 1.3631787300109863, "learning_rate": 2.0350344827586207e-05, "loss": 0.2452, "step": 4921 }, { "epoch": 16.97241379310345, "grad_norm": 0.7120394706726074, "learning_rate": 2.035448275862069e-05, "loss": 0.3032, "step": 4922 }, { "epoch": 16.97586206896552, "grad_norm": 0.9055486917495728, "learning_rate": 2.0358620689655174e-05, "loss": 0.2878, "step": 4923 }, { "epoch": 16.979310344827585, "grad_norm": 1.2476742267608643, "learning_rate": 2.0362758620689656e-05, "loss": 0.2888, "step": 4924 }, { "epoch": 16.982758620689655, "grad_norm": 1.336132526397705, "learning_rate": 2.0366896551724138e-05, "loss": 0.3012, "step": 4925 }, { "epoch": 16.986206896551725, "grad_norm": 1.0465649366378784, "learning_rate": 2.037103448275862e-05, "loss": 0.2682, "step": 4926 }, { "epoch": 16.989655172413794, "grad_norm": 0.9397979974746704, "learning_rate": 2.0375172413793102e-05, "loss": 0.2757, "step": 4927 }, { "epoch": 16.99310344827586, "grad_norm": 0.8677775263786316, "learning_rate": 2.0379310344827588e-05, "loss": 0.323, "step": 4928 }, { "epoch": 16.99655172413793, "grad_norm": 1.1310428380966187, "learning_rate": 2.038344827586207e-05, "loss": 0.3386, "step": 4929 }, { "epoch": 17.0, "grad_norm": 1.9511315822601318, "learning_rate": 2.0387586206896552e-05, "loss": 0.3994, "step": 4930 }, { "epoch": 17.00344827586207, "grad_norm": 0.9153809547424316, "learning_rate": 2.0391724137931034e-05, "loss": 0.3288, "step": 4931 }, { "epoch": 17.00689655172414, "grad_norm": 1.1267868280410767, "learning_rate": 2.039586206896552e-05, "loss": 0.2964, "step": 4932 }, { "epoch": 17.010344827586206, "grad_norm": 0.9805169105529785, "learning_rate": 2.04e-05, "loss": 0.2966, "step": 4933 }, { "epoch": 17.013793103448275, "grad_norm": 1.0294607877731323, "learning_rate": 2.0404137931034483e-05, "loss": 0.2889, "step": 4934 }, { "epoch": 17.017241379310345, "grad_norm": 0.5088774561882019, "learning_rate": 2.0408275862068965e-05, "loss": 0.275, "step": 4935 }, { "epoch": 17.020689655172415, "grad_norm": 0.5994563102722168, "learning_rate": 2.041241379310345e-05, "loss": 0.2654, "step": 4936 }, { "epoch": 17.02413793103448, "grad_norm": 0.6954531669616699, "learning_rate": 2.0416551724137933e-05, "loss": 0.3015, "step": 4937 }, { "epoch": 17.02758620689655, "grad_norm": 0.9019528031349182, "learning_rate": 2.0420689655172415e-05, "loss": 0.2666, "step": 4938 }, { "epoch": 17.03103448275862, "grad_norm": 0.6215035915374756, "learning_rate": 2.0424827586206897e-05, "loss": 0.3046, "step": 4939 }, { "epoch": 17.03448275862069, "grad_norm": 0.685732364654541, "learning_rate": 2.042896551724138e-05, "loss": 0.2634, "step": 4940 }, { "epoch": 17.03793103448276, "grad_norm": 1.0724700689315796, "learning_rate": 2.0433103448275864e-05, "loss": 0.2994, "step": 4941 }, { "epoch": 17.041379310344826, "grad_norm": 0.9718945026397705, "learning_rate": 2.0437241379310343e-05, "loss": 0.2953, "step": 4942 }, { "epoch": 17.044827586206896, "grad_norm": 0.592540442943573, "learning_rate": 2.0441379310344828e-05, "loss": 0.2666, "step": 4943 }, { "epoch": 17.048275862068966, "grad_norm": 0.7220354676246643, "learning_rate": 2.044551724137931e-05, "loss": 0.2751, "step": 4944 }, { "epoch": 17.051724137931036, "grad_norm": 1.2280561923980713, "learning_rate": 2.0449655172413796e-05, "loss": 0.2935, "step": 4945 }, { "epoch": 17.055172413793102, "grad_norm": 2.2479653358459473, "learning_rate": 2.0453793103448274e-05, "loss": 0.2748, "step": 4946 }, { "epoch": 17.05862068965517, "grad_norm": 0.7363001704216003, "learning_rate": 2.045793103448276e-05, "loss": 0.2734, "step": 4947 }, { "epoch": 17.06206896551724, "grad_norm": 1.146575689315796, "learning_rate": 2.046206896551724e-05, "loss": 0.2916, "step": 4948 }, { "epoch": 17.06551724137931, "grad_norm": 1.2175182104110718, "learning_rate": 2.0466206896551727e-05, "loss": 0.2935, "step": 4949 }, { "epoch": 17.06896551724138, "grad_norm": 0.7946876287460327, "learning_rate": 2.0470344827586206e-05, "loss": 0.2618, "step": 4950 }, { "epoch": 17.072413793103447, "grad_norm": 0.9051012396812439, "learning_rate": 2.047448275862069e-05, "loss": 0.3192, "step": 4951 }, { "epoch": 17.075862068965517, "grad_norm": 1.1981604099273682, "learning_rate": 2.0478620689655173e-05, "loss": 0.2657, "step": 4952 }, { "epoch": 17.079310344827586, "grad_norm": 1.5842175483703613, "learning_rate": 2.0482758620689655e-05, "loss": 0.279, "step": 4953 }, { "epoch": 17.082758620689656, "grad_norm": 1.557660460472107, "learning_rate": 2.0486896551724137e-05, "loss": 0.3469, "step": 4954 }, { "epoch": 17.086206896551722, "grad_norm": 1.4971609115600586, "learning_rate": 2.049103448275862e-05, "loss": 0.4386, "step": 4955 }, { "epoch": 17.089655172413792, "grad_norm": 0.7564134001731873, "learning_rate": 2.0495172413793105e-05, "loss": 0.3154, "step": 4956 }, { "epoch": 17.093103448275862, "grad_norm": 1.0123529434204102, "learning_rate": 2.0499310344827587e-05, "loss": 0.2836, "step": 4957 }, { "epoch": 17.09655172413793, "grad_norm": 0.9523440599441528, "learning_rate": 2.050344827586207e-05, "loss": 0.289, "step": 4958 }, { "epoch": 17.1, "grad_norm": 0.7168685793876648, "learning_rate": 2.050758620689655e-05, "loss": 0.3178, "step": 4959 }, { "epoch": 17.103448275862068, "grad_norm": 0.6629805564880371, "learning_rate": 2.0511724137931036e-05, "loss": 0.3047, "step": 4960 }, { "epoch": 17.106896551724137, "grad_norm": 0.9503162503242493, "learning_rate": 2.0515862068965518e-05, "loss": 0.3001, "step": 4961 }, { "epoch": 17.110344827586207, "grad_norm": 0.6022122502326965, "learning_rate": 2.0520000000000003e-05, "loss": 0.3089, "step": 4962 }, { "epoch": 17.113793103448277, "grad_norm": 0.6812213659286499, "learning_rate": 2.0524137931034482e-05, "loss": 0.2679, "step": 4963 }, { "epoch": 17.117241379310343, "grad_norm": 0.7191502451896667, "learning_rate": 2.0528275862068967e-05, "loss": 0.2906, "step": 4964 }, { "epoch": 17.120689655172413, "grad_norm": 1.6372315883636475, "learning_rate": 2.053241379310345e-05, "loss": 0.2615, "step": 4965 }, { "epoch": 17.124137931034483, "grad_norm": 0.8522419333457947, "learning_rate": 2.053655172413793e-05, "loss": 0.2386, "step": 4966 }, { "epoch": 17.127586206896552, "grad_norm": 1.1941108703613281, "learning_rate": 2.0540689655172414e-05, "loss": 0.2971, "step": 4967 }, { "epoch": 17.131034482758622, "grad_norm": 0.8363252282142639, "learning_rate": 2.0544827586206896e-05, "loss": 0.2877, "step": 4968 }, { "epoch": 17.13448275862069, "grad_norm": 1.0038135051727295, "learning_rate": 2.054896551724138e-05, "loss": 0.2787, "step": 4969 }, { "epoch": 17.137931034482758, "grad_norm": 1.1378589868545532, "learning_rate": 2.0553103448275863e-05, "loss": 0.2697, "step": 4970 }, { "epoch": 17.141379310344828, "grad_norm": 0.8250481486320496, "learning_rate": 2.0557241379310345e-05, "loss": 0.2851, "step": 4971 }, { "epoch": 17.144827586206898, "grad_norm": 0.8223645091056824, "learning_rate": 2.0561379310344827e-05, "loss": 0.2748, "step": 4972 }, { "epoch": 17.148275862068967, "grad_norm": 1.3656038045883179, "learning_rate": 2.0565517241379312e-05, "loss": 0.2683, "step": 4973 }, { "epoch": 17.151724137931033, "grad_norm": 1.40646231174469, "learning_rate": 2.0569655172413794e-05, "loss": 0.2791, "step": 4974 }, { "epoch": 17.155172413793103, "grad_norm": 0.8144022822380066, "learning_rate": 2.0573793103448276e-05, "loss": 0.2582, "step": 4975 }, { "epoch": 17.158620689655173, "grad_norm": 0.9287348389625549, "learning_rate": 2.057793103448276e-05, "loss": 0.2973, "step": 4976 }, { "epoch": 17.162068965517243, "grad_norm": 5.655907154083252, "learning_rate": 2.0582068965517244e-05, "loss": 0.2858, "step": 4977 }, { "epoch": 17.16551724137931, "grad_norm": 1.2036292552947998, "learning_rate": 2.0586206896551726e-05, "loss": 0.2852, "step": 4978 }, { "epoch": 17.16896551724138, "grad_norm": 2.7995100021362305, "learning_rate": 2.0590344827586205e-05, "loss": 0.2918, "step": 4979 }, { "epoch": 17.17241379310345, "grad_norm": 2.6090476512908936, "learning_rate": 2.059448275862069e-05, "loss": 0.3962, "step": 4980 }, { "epoch": 17.175862068965518, "grad_norm": 1.1043860912322998, "learning_rate": 2.0598620689655172e-05, "loss": 0.3323, "step": 4981 }, { "epoch": 17.179310344827588, "grad_norm": 0.7934916615486145, "learning_rate": 2.0602758620689657e-05, "loss": 0.3096, "step": 4982 }, { "epoch": 17.182758620689654, "grad_norm": 1.5112491846084595, "learning_rate": 2.0606896551724136e-05, "loss": 0.302, "step": 4983 }, { "epoch": 17.186206896551724, "grad_norm": 1.2126020193099976, "learning_rate": 2.061103448275862e-05, "loss": 0.3051, "step": 4984 }, { "epoch": 17.189655172413794, "grad_norm": 0.9292972087860107, "learning_rate": 2.0615172413793103e-05, "loss": 0.2847, "step": 4985 }, { "epoch": 17.193103448275863, "grad_norm": 2.8141841888427734, "learning_rate": 2.061931034482759e-05, "loss": 0.2813, "step": 4986 }, { "epoch": 17.19655172413793, "grad_norm": 0.8425857424736023, "learning_rate": 2.0623448275862067e-05, "loss": 0.2739, "step": 4987 }, { "epoch": 17.2, "grad_norm": 0.8219760060310364, "learning_rate": 2.0627586206896553e-05, "loss": 0.2963, "step": 4988 }, { "epoch": 17.20344827586207, "grad_norm": 0.6916044354438782, "learning_rate": 2.0631724137931035e-05, "loss": 0.2597, "step": 4989 }, { "epoch": 17.20689655172414, "grad_norm": 0.7236573696136475, "learning_rate": 2.063586206896552e-05, "loss": 0.3003, "step": 4990 }, { "epoch": 17.21034482758621, "grad_norm": 0.9043285250663757, "learning_rate": 2.064e-05, "loss": 0.2958, "step": 4991 }, { "epoch": 17.213793103448275, "grad_norm": 0.9290730953216553, "learning_rate": 2.064413793103448e-05, "loss": 0.3148, "step": 4992 }, { "epoch": 17.217241379310344, "grad_norm": 0.7600983381271362, "learning_rate": 2.0648275862068966e-05, "loss": 0.2816, "step": 4993 }, { "epoch": 17.220689655172414, "grad_norm": 0.9191033840179443, "learning_rate": 2.065241379310345e-05, "loss": 0.3155, "step": 4994 }, { "epoch": 17.224137931034484, "grad_norm": 0.7441519498825073, "learning_rate": 2.0656551724137934e-05, "loss": 0.2601, "step": 4995 }, { "epoch": 17.22758620689655, "grad_norm": 0.7191734910011292, "learning_rate": 2.0660689655172412e-05, "loss": 0.2629, "step": 4996 }, { "epoch": 17.23103448275862, "grad_norm": 0.7486431002616882, "learning_rate": 2.0664827586206898e-05, "loss": 0.2994, "step": 4997 }, { "epoch": 17.23448275862069, "grad_norm": 1.031962513923645, "learning_rate": 2.066896551724138e-05, "loss": 0.3074, "step": 4998 }, { "epoch": 17.23793103448276, "grad_norm": 0.851092517375946, "learning_rate": 2.0673103448275865e-05, "loss": 0.2779, "step": 4999 }, { "epoch": 17.24137931034483, "grad_norm": 0.8814362287521362, "learning_rate": 2.0677241379310344e-05, "loss": 0.2602, "step": 5000 }, { "epoch": 17.24137931034483, "eval_cer": 0.13660145880048455, "eval_loss": 0.3230190575122833, "eval_runtime": 17.9137, "eval_samples_per_second": 51.748, "eval_steps_per_second": 0.167, "eval_wer": 0.32518115942028986, "step": 5000 }, { "epoch": 17.244827586206895, "grad_norm": 1.3136608600616455, "learning_rate": 2.068137931034483e-05, "loss": 0.2977, "step": 5001 }, { "epoch": 17.248275862068965, "grad_norm": 0.931272566318512, "learning_rate": 2.068551724137931e-05, "loss": 0.2754, "step": 5002 }, { "epoch": 17.251724137931035, "grad_norm": 1.28900945186615, "learning_rate": 2.0689655172413797e-05, "loss": 0.3099, "step": 5003 }, { "epoch": 17.255172413793105, "grad_norm": 1.6249669790267944, "learning_rate": 2.0693793103448275e-05, "loss": 0.3509, "step": 5004 }, { "epoch": 17.25862068965517, "grad_norm": 2.436079502105713, "learning_rate": 2.0697931034482757e-05, "loss": 0.4293, "step": 5005 }, { "epoch": 17.26206896551724, "grad_norm": 0.8039688467979431, "learning_rate": 2.0702068965517243e-05, "loss": 0.3354, "step": 5006 }, { "epoch": 17.26551724137931, "grad_norm": 0.8906108140945435, "learning_rate": 2.0706206896551725e-05, "loss": 0.3118, "step": 5007 }, { "epoch": 17.26896551724138, "grad_norm": 0.7317248582839966, "learning_rate": 2.0710344827586207e-05, "loss": 0.2809, "step": 5008 }, { "epoch": 17.27241379310345, "grad_norm": 0.8668102622032166, "learning_rate": 2.071448275862069e-05, "loss": 0.2967, "step": 5009 }, { "epoch": 17.275862068965516, "grad_norm": 1.249123454093933, "learning_rate": 2.0718620689655174e-05, "loss": 0.2994, "step": 5010 }, { "epoch": 17.279310344827586, "grad_norm": 0.7263730764389038, "learning_rate": 2.0722758620689656e-05, "loss": 0.2739, "step": 5011 }, { "epoch": 17.282758620689656, "grad_norm": 1.0139468908309937, "learning_rate": 2.0726896551724138e-05, "loss": 0.2812, "step": 5012 }, { "epoch": 17.286206896551725, "grad_norm": 0.9329712390899658, "learning_rate": 2.073103448275862e-05, "loss": 0.305, "step": 5013 }, { "epoch": 17.28965517241379, "grad_norm": 0.7404047250747681, "learning_rate": 2.0735172413793106e-05, "loss": 0.2637, "step": 5014 }, { "epoch": 17.29310344827586, "grad_norm": 0.8364859223365784, "learning_rate": 2.0739310344827588e-05, "loss": 0.2628, "step": 5015 }, { "epoch": 17.29655172413793, "grad_norm": 0.7697840332984924, "learning_rate": 2.074344827586207e-05, "loss": 0.2817, "step": 5016 }, { "epoch": 17.3, "grad_norm": 0.8096581697463989, "learning_rate": 2.074758620689655e-05, "loss": 0.2648, "step": 5017 }, { "epoch": 17.30344827586207, "grad_norm": 1.223091959953308, "learning_rate": 2.0751724137931034e-05, "loss": 0.2349, "step": 5018 }, { "epoch": 17.306896551724137, "grad_norm": 1.1299622058868408, "learning_rate": 2.075586206896552e-05, "loss": 0.2692, "step": 5019 }, { "epoch": 17.310344827586206, "grad_norm": 0.8042427897453308, "learning_rate": 2.0759999999999998e-05, "loss": 0.2801, "step": 5020 }, { "epoch": 17.313793103448276, "grad_norm": 0.8748602271080017, "learning_rate": 2.0764137931034483e-05, "loss": 0.2855, "step": 5021 }, { "epoch": 17.317241379310346, "grad_norm": 0.9450084567070007, "learning_rate": 2.0768275862068965e-05, "loss": 0.2874, "step": 5022 }, { "epoch": 17.320689655172412, "grad_norm": 0.7708240151405334, "learning_rate": 2.077241379310345e-05, "loss": 0.2952, "step": 5023 }, { "epoch": 17.324137931034482, "grad_norm": 0.9248443245887756, "learning_rate": 2.077655172413793e-05, "loss": 0.2643, "step": 5024 }, { "epoch": 17.32758620689655, "grad_norm": 0.9934765696525574, "learning_rate": 2.0780689655172415e-05, "loss": 0.2854, "step": 5025 }, { "epoch": 17.33103448275862, "grad_norm": 0.9617242813110352, "learning_rate": 2.0784827586206897e-05, "loss": 0.2972, "step": 5026 }, { "epoch": 17.33448275862069, "grad_norm": 1.152195930480957, "learning_rate": 2.0788965517241382e-05, "loss": 0.2721, "step": 5027 }, { "epoch": 17.337931034482757, "grad_norm": 1.1710947751998901, "learning_rate": 2.0793103448275864e-05, "loss": 0.3003, "step": 5028 }, { "epoch": 17.341379310344827, "grad_norm": 1.9777209758758545, "learning_rate": 2.0797241379310346e-05, "loss": 0.3028, "step": 5029 }, { "epoch": 17.344827586206897, "grad_norm": 8.458918571472168, "learning_rate": 2.0801379310344828e-05, "loss": 0.4023, "step": 5030 }, { "epoch": 17.348275862068967, "grad_norm": 0.8353826403617859, "learning_rate": 2.080551724137931e-05, "loss": 0.31, "step": 5031 }, { "epoch": 17.351724137931033, "grad_norm": 0.9156121015548706, "learning_rate": 2.0809655172413796e-05, "loss": 0.3192, "step": 5032 }, { "epoch": 17.355172413793102, "grad_norm": 0.8592126965522766, "learning_rate": 2.0813793103448274e-05, "loss": 0.3019, "step": 5033 }, { "epoch": 17.358620689655172, "grad_norm": 0.9841328859329224, "learning_rate": 2.081793103448276e-05, "loss": 0.3125, "step": 5034 }, { "epoch": 17.362068965517242, "grad_norm": 1.0010229349136353, "learning_rate": 2.082206896551724e-05, "loss": 0.2768, "step": 5035 }, { "epoch": 17.36551724137931, "grad_norm": 0.7761617302894592, "learning_rate": 2.0826206896551727e-05, "loss": 0.2705, "step": 5036 }, { "epoch": 17.368965517241378, "grad_norm": 1.1603385210037231, "learning_rate": 2.0830344827586206e-05, "loss": 0.2958, "step": 5037 }, { "epoch": 17.372413793103448, "grad_norm": 0.7986253499984741, "learning_rate": 2.083448275862069e-05, "loss": 0.2941, "step": 5038 }, { "epoch": 17.375862068965517, "grad_norm": 0.714255154132843, "learning_rate": 2.0838620689655173e-05, "loss": 0.2765, "step": 5039 }, { "epoch": 17.379310344827587, "grad_norm": 0.6936031579971313, "learning_rate": 2.084275862068966e-05, "loss": 0.2944, "step": 5040 }, { "epoch": 17.382758620689657, "grad_norm": 0.668982982635498, "learning_rate": 2.0846896551724137e-05, "loss": 0.2693, "step": 5041 }, { "epoch": 17.386206896551723, "grad_norm": 0.8056187629699707, "learning_rate": 2.0851034482758622e-05, "loss": 0.296, "step": 5042 }, { "epoch": 17.389655172413793, "grad_norm": 0.918462336063385, "learning_rate": 2.0855172413793104e-05, "loss": 0.2654, "step": 5043 }, { "epoch": 17.393103448275863, "grad_norm": 0.7142491340637207, "learning_rate": 2.0859310344827587e-05, "loss": 0.3249, "step": 5044 }, { "epoch": 17.396551724137932, "grad_norm": 4.139335632324219, "learning_rate": 2.086344827586207e-05, "loss": 0.2741, "step": 5045 }, { "epoch": 17.4, "grad_norm": 0.9472634196281433, "learning_rate": 2.086758620689655e-05, "loss": 0.2582, "step": 5046 }, { "epoch": 17.40344827586207, "grad_norm": 1.2471935749053955, "learning_rate": 2.0871724137931036e-05, "loss": 0.2779, "step": 5047 }, { "epoch": 17.406896551724138, "grad_norm": 0.7951177954673767, "learning_rate": 2.0875862068965518e-05, "loss": 0.272, "step": 5048 }, { "epoch": 17.410344827586208, "grad_norm": 0.9206835627555847, "learning_rate": 2.088e-05, "loss": 0.2831, "step": 5049 }, { "epoch": 17.413793103448278, "grad_norm": 0.888474702835083, "learning_rate": 2.0884137931034482e-05, "loss": 0.2848, "step": 5050 }, { "epoch": 17.417241379310344, "grad_norm": 0.9717963337898254, "learning_rate": 2.0888275862068967e-05, "loss": 0.2882, "step": 5051 }, { "epoch": 17.420689655172414, "grad_norm": 0.8693985939025879, "learning_rate": 2.089241379310345e-05, "loss": 0.2697, "step": 5052 }, { "epoch": 17.424137931034483, "grad_norm": 0.8633184432983398, "learning_rate": 2.089655172413793e-05, "loss": 0.2861, "step": 5053 }, { "epoch": 17.427586206896553, "grad_norm": 0.9718332290649414, "learning_rate": 2.0900689655172413e-05, "loss": 0.2682, "step": 5054 }, { "epoch": 17.43103448275862, "grad_norm": 1.5043208599090576, "learning_rate": 2.0904827586206895e-05, "loss": 0.3635, "step": 5055 }, { "epoch": 17.43448275862069, "grad_norm": 0.6122180819511414, "learning_rate": 2.090896551724138e-05, "loss": 0.3268, "step": 5056 }, { "epoch": 17.43793103448276, "grad_norm": 0.73866868019104, "learning_rate": 2.091310344827586e-05, "loss": 0.2832, "step": 5057 }, { "epoch": 17.44137931034483, "grad_norm": 0.8595353364944458, "learning_rate": 2.0917241379310345e-05, "loss": 0.2948, "step": 5058 }, { "epoch": 17.444827586206898, "grad_norm": 0.7852151989936829, "learning_rate": 2.0921379310344827e-05, "loss": 0.2914, "step": 5059 }, { "epoch": 17.448275862068964, "grad_norm": 0.9317717552185059, "learning_rate": 2.0925517241379312e-05, "loss": 0.296, "step": 5060 }, { "epoch": 17.451724137931034, "grad_norm": 1.8011014461517334, "learning_rate": 2.0929655172413794e-05, "loss": 0.3004, "step": 5061 }, { "epoch": 17.455172413793104, "grad_norm": 0.7231969237327576, "learning_rate": 2.0933793103448276e-05, "loss": 0.2923, "step": 5062 }, { "epoch": 17.458620689655174, "grad_norm": 0.7520900964736938, "learning_rate": 2.093793103448276e-05, "loss": 0.2939, "step": 5063 }, { "epoch": 17.46206896551724, "grad_norm": 1.2665752172470093, "learning_rate": 2.0942068965517244e-05, "loss": 0.2874, "step": 5064 }, { "epoch": 17.46551724137931, "grad_norm": 0.5789496302604675, "learning_rate": 2.0946206896551726e-05, "loss": 0.2503, "step": 5065 }, { "epoch": 17.46896551724138, "grad_norm": 1.1549001932144165, "learning_rate": 2.0950344827586208e-05, "loss": 0.2678, "step": 5066 }, { "epoch": 17.47241379310345, "grad_norm": 1.2810791730880737, "learning_rate": 2.095448275862069e-05, "loss": 0.2852, "step": 5067 }, { "epoch": 17.47586206896552, "grad_norm": 0.9856975674629211, "learning_rate": 2.0958620689655172e-05, "loss": 0.2627, "step": 5068 }, { "epoch": 17.479310344827585, "grad_norm": 0.9149376153945923, "learning_rate": 2.0962758620689657e-05, "loss": 0.2475, "step": 5069 }, { "epoch": 17.482758620689655, "grad_norm": 0.842220664024353, "learning_rate": 2.0966896551724136e-05, "loss": 0.2821, "step": 5070 }, { "epoch": 17.486206896551725, "grad_norm": 1.022969126701355, "learning_rate": 2.097103448275862e-05, "loss": 0.2644, "step": 5071 }, { "epoch": 17.489655172413794, "grad_norm": 2.152315616607666, "learning_rate": 2.0975172413793103e-05, "loss": 0.3042, "step": 5072 }, { "epoch": 17.49310344827586, "grad_norm": 0.8093752264976501, "learning_rate": 2.097931034482759e-05, "loss": 0.2795, "step": 5073 }, { "epoch": 17.49655172413793, "grad_norm": 1.2036231756210327, "learning_rate": 2.0983448275862067e-05, "loss": 0.2802, "step": 5074 }, { "epoch": 17.5, "grad_norm": 1.1182337999343872, "learning_rate": 2.0987586206896553e-05, "loss": 0.2836, "step": 5075 }, { "epoch": 17.50344827586207, "grad_norm": 1.6192057132720947, "learning_rate": 2.0991724137931035e-05, "loss": 0.2535, "step": 5076 }, { "epoch": 17.50689655172414, "grad_norm": 0.7996068596839905, "learning_rate": 2.099586206896552e-05, "loss": 0.2674, "step": 5077 }, { "epoch": 17.510344827586206, "grad_norm": 1.4417608976364136, "learning_rate": 2.1e-05, "loss": 0.3134, "step": 5078 }, { "epoch": 17.513793103448275, "grad_norm": 1.3263025283813477, "learning_rate": 2.1004137931034484e-05, "loss": 0.2865, "step": 5079 }, { "epoch": 17.517241379310345, "grad_norm": 1.3888236284255981, "learning_rate": 2.1008275862068966e-05, "loss": 0.3933, "step": 5080 }, { "epoch": 17.520689655172415, "grad_norm": 0.7129997611045837, "learning_rate": 2.1012413793103448e-05, "loss": 0.3661, "step": 5081 }, { "epoch": 17.52413793103448, "grad_norm": 0.7467970252037048, "learning_rate": 2.101655172413793e-05, "loss": 0.2835, "step": 5082 }, { "epoch": 17.52758620689655, "grad_norm": 0.6564076542854309, "learning_rate": 2.1020689655172412e-05, "loss": 0.2986, "step": 5083 }, { "epoch": 17.53103448275862, "grad_norm": 0.7443508505821228, "learning_rate": 2.1024827586206898e-05, "loss": 0.3085, "step": 5084 }, { "epoch": 17.53448275862069, "grad_norm": 0.6052218079566956, "learning_rate": 2.102896551724138e-05, "loss": 0.2911, "step": 5085 }, { "epoch": 17.53793103448276, "grad_norm": 2.4902100563049316, "learning_rate": 2.1033103448275862e-05, "loss": 0.3152, "step": 5086 }, { "epoch": 17.541379310344826, "grad_norm": 1.1048974990844727, "learning_rate": 2.1037241379310344e-05, "loss": 0.303, "step": 5087 }, { "epoch": 17.544827586206896, "grad_norm": 0.6707825064659119, "learning_rate": 2.104137931034483e-05, "loss": 0.3043, "step": 5088 }, { "epoch": 17.548275862068966, "grad_norm": 0.6965322494506836, "learning_rate": 2.104551724137931e-05, "loss": 0.2858, "step": 5089 }, { "epoch": 17.551724137931036, "grad_norm": 0.8779163360595703, "learning_rate": 2.1049655172413793e-05, "loss": 0.2944, "step": 5090 }, { "epoch": 17.555172413793102, "grad_norm": 1.8278826475143433, "learning_rate": 2.1053793103448275e-05, "loss": 0.2736, "step": 5091 }, { "epoch": 17.55862068965517, "grad_norm": 0.9334878325462341, "learning_rate": 2.105793103448276e-05, "loss": 0.2791, "step": 5092 }, { "epoch": 17.56206896551724, "grad_norm": 0.8937340378761292, "learning_rate": 2.1062068965517243e-05, "loss": 0.2713, "step": 5093 }, { "epoch": 17.56551724137931, "grad_norm": 1.001982569694519, "learning_rate": 2.1066206896551725e-05, "loss": 0.3158, "step": 5094 }, { "epoch": 17.56896551724138, "grad_norm": 1.1404491662979126, "learning_rate": 2.1070344827586207e-05, "loss": 0.31, "step": 5095 }, { "epoch": 17.572413793103447, "grad_norm": 0.7552271485328674, "learning_rate": 2.107448275862069e-05, "loss": 0.2744, "step": 5096 }, { "epoch": 17.575862068965517, "grad_norm": 1.063977599143982, "learning_rate": 2.1078620689655174e-05, "loss": 0.2613, "step": 5097 }, { "epoch": 17.579310344827586, "grad_norm": 0.8963609933853149, "learning_rate": 2.1082758620689656e-05, "loss": 0.2725, "step": 5098 }, { "epoch": 17.582758620689656, "grad_norm": 2.6415369510650635, "learning_rate": 2.1086896551724138e-05, "loss": 0.2487, "step": 5099 }, { "epoch": 17.586206896551722, "grad_norm": 1.0196926593780518, "learning_rate": 2.109103448275862e-05, "loss": 0.3017, "step": 5100 }, { "epoch": 17.589655172413792, "grad_norm": 0.9305127859115601, "learning_rate": 2.1095172413793106e-05, "loss": 0.2588, "step": 5101 }, { "epoch": 17.593103448275862, "grad_norm": 1.039580225944519, "learning_rate": 2.1099310344827588e-05, "loss": 0.2789, "step": 5102 }, { "epoch": 17.59655172413793, "grad_norm": 0.9968008995056152, "learning_rate": 2.110344827586207e-05, "loss": 0.2985, "step": 5103 }, { "epoch": 17.6, "grad_norm": 1.4856939315795898, "learning_rate": 2.110758620689655e-05, "loss": 0.3273, "step": 5104 }, { "epoch": 17.603448275862068, "grad_norm": 1.6350394487380981, "learning_rate": 2.1111724137931037e-05, "loss": 0.4233, "step": 5105 }, { "epoch": 17.606896551724137, "grad_norm": 0.7771427035331726, "learning_rate": 2.111586206896552e-05, "loss": 0.3273, "step": 5106 }, { "epoch": 17.610344827586207, "grad_norm": 0.7819304466247559, "learning_rate": 2.1119999999999998e-05, "loss": 0.347, "step": 5107 }, { "epoch": 17.613793103448277, "grad_norm": 0.6604264974594116, "learning_rate": 2.1124137931034483e-05, "loss": 0.2996, "step": 5108 }, { "epoch": 17.617241379310343, "grad_norm": 0.7147673964500427, "learning_rate": 2.1128275862068965e-05, "loss": 0.2795, "step": 5109 }, { "epoch": 17.620689655172413, "grad_norm": 1.3457460403442383, "learning_rate": 2.113241379310345e-05, "loss": 0.2857, "step": 5110 }, { "epoch": 17.624137931034483, "grad_norm": 0.8756190538406372, "learning_rate": 2.113655172413793e-05, "loss": 0.2611, "step": 5111 }, { "epoch": 17.627586206896552, "grad_norm": 0.9685503840446472, "learning_rate": 2.1140689655172415e-05, "loss": 0.2785, "step": 5112 }, { "epoch": 17.631034482758622, "grad_norm": 0.6930207014083862, "learning_rate": 2.1144827586206897e-05, "loss": 0.3088, "step": 5113 }, { "epoch": 17.63448275862069, "grad_norm": 0.8734753131866455, "learning_rate": 2.1148965517241382e-05, "loss": 0.2687, "step": 5114 }, { "epoch": 17.637931034482758, "grad_norm": 1.00419020652771, "learning_rate": 2.115310344827586e-05, "loss": 0.2389, "step": 5115 }, { "epoch": 17.641379310344828, "grad_norm": 0.9106219410896301, "learning_rate": 2.1157241379310346e-05, "loss": 0.2976, "step": 5116 }, { "epoch": 17.644827586206898, "grad_norm": 1.1636767387390137, "learning_rate": 2.1161379310344828e-05, "loss": 0.32, "step": 5117 }, { "epoch": 17.648275862068964, "grad_norm": 0.8784304261207581, "learning_rate": 2.1165517241379313e-05, "loss": 0.2502, "step": 5118 }, { "epoch": 17.651724137931033, "grad_norm": 0.8916454315185547, "learning_rate": 2.1169655172413792e-05, "loss": 0.3049, "step": 5119 }, { "epoch": 17.655172413793103, "grad_norm": 1.0790337324142456, "learning_rate": 2.1173793103448274e-05, "loss": 0.2932, "step": 5120 }, { "epoch": 17.658620689655173, "grad_norm": 0.9317659735679626, "learning_rate": 2.117793103448276e-05, "loss": 0.2793, "step": 5121 }, { "epoch": 17.662068965517243, "grad_norm": 1.1436790227890015, "learning_rate": 2.118206896551724e-05, "loss": 0.2608, "step": 5122 }, { "epoch": 17.66551724137931, "grad_norm": 1.0791438817977905, "learning_rate": 2.1186206896551724e-05, "loss": 0.2666, "step": 5123 }, { "epoch": 17.66896551724138, "grad_norm": 1.8802438974380493, "learning_rate": 2.1190344827586206e-05, "loss": 0.2884, "step": 5124 }, { "epoch": 17.67241379310345, "grad_norm": 1.4506573677062988, "learning_rate": 2.119448275862069e-05, "loss": 0.3092, "step": 5125 }, { "epoch": 17.675862068965518, "grad_norm": 1.2358100414276123, "learning_rate": 2.1198620689655173e-05, "loss": 0.2884, "step": 5126 }, { "epoch": 17.679310344827588, "grad_norm": 1.062738299369812, "learning_rate": 2.120275862068966e-05, "loss": 0.2799, "step": 5127 }, { "epoch": 17.682758620689654, "grad_norm": 1.0611450672149658, "learning_rate": 2.1206896551724137e-05, "loss": 0.2908, "step": 5128 }, { "epoch": 17.686206896551724, "grad_norm": 1.4085462093353271, "learning_rate": 2.1211034482758622e-05, "loss": 0.342, "step": 5129 }, { "epoch": 17.689655172413794, "grad_norm": 1.844196081161499, "learning_rate": 2.1215172413793104e-05, "loss": 0.424, "step": 5130 }, { "epoch": 17.693103448275863, "grad_norm": 0.8295010924339294, "learning_rate": 2.121931034482759e-05, "loss": 0.3434, "step": 5131 }, { "epoch": 17.69655172413793, "grad_norm": 0.7149494886398315, "learning_rate": 2.122344827586207e-05, "loss": 0.3383, "step": 5132 }, { "epoch": 17.7, "grad_norm": 0.836395263671875, "learning_rate": 2.122758620689655e-05, "loss": 0.3025, "step": 5133 }, { "epoch": 17.70344827586207, "grad_norm": 1.205655574798584, "learning_rate": 2.1231724137931036e-05, "loss": 0.2847, "step": 5134 }, { "epoch": 17.70689655172414, "grad_norm": 0.7640878558158875, "learning_rate": 2.1235862068965518e-05, "loss": 0.2704, "step": 5135 }, { "epoch": 17.71034482758621, "grad_norm": 1.2120447158813477, "learning_rate": 2.124e-05, "loss": 0.3299, "step": 5136 }, { "epoch": 17.713793103448275, "grad_norm": 1.1371138095855713, "learning_rate": 2.1244137931034482e-05, "loss": 0.3087, "step": 5137 }, { "epoch": 17.717241379310344, "grad_norm": 1.4575608968734741, "learning_rate": 2.1248275862068967e-05, "loss": 0.2654, "step": 5138 }, { "epoch": 17.720689655172414, "grad_norm": 1.0667341947555542, "learning_rate": 2.125241379310345e-05, "loss": 0.2743, "step": 5139 }, { "epoch": 17.724137931034484, "grad_norm": 0.684798538684845, "learning_rate": 2.125655172413793e-05, "loss": 0.2794, "step": 5140 }, { "epoch": 17.72758620689655, "grad_norm": 1.1581501960754395, "learning_rate": 2.1260689655172413e-05, "loss": 0.2653, "step": 5141 }, { "epoch": 17.73103448275862, "grad_norm": 0.8001710772514343, "learning_rate": 2.12648275862069e-05, "loss": 0.283, "step": 5142 }, { "epoch": 17.73448275862069, "grad_norm": 0.7555004358291626, "learning_rate": 2.126896551724138e-05, "loss": 0.2473, "step": 5143 }, { "epoch": 17.73793103448276, "grad_norm": 0.9844019412994385, "learning_rate": 2.1273103448275863e-05, "loss": 0.2418, "step": 5144 }, { "epoch": 17.74137931034483, "grad_norm": Infinity, "learning_rate": 2.1273103448275863e-05, "loss": 0.2889, "step": 5145 }, { "epoch": 17.744827586206895, "grad_norm": 0.7686320543289185, "learning_rate": 2.1277241379310345e-05, "loss": 0.2403, "step": 5146 }, { "epoch": 17.748275862068965, "grad_norm": 1.4658528566360474, "learning_rate": 2.1281379310344827e-05, "loss": 0.2868, "step": 5147 }, { "epoch": 17.751724137931035, "grad_norm": 0.8266062140464783, "learning_rate": 2.1285517241379312e-05, "loss": 0.266, "step": 5148 }, { "epoch": 17.755172413793105, "grad_norm": 0.7866661548614502, "learning_rate": 2.128965517241379e-05, "loss": 0.319, "step": 5149 }, { "epoch": 17.75862068965517, "grad_norm": 1.6691874265670776, "learning_rate": 2.1293793103448276e-05, "loss": 0.3224, "step": 5150 }, { "epoch": 17.76206896551724, "grad_norm": 0.8895465731620789, "learning_rate": 2.129793103448276e-05, "loss": 0.3323, "step": 5151 }, { "epoch": 17.76551724137931, "grad_norm": 0.9826098680496216, "learning_rate": 2.1302068965517244e-05, "loss": 0.2886, "step": 5152 }, { "epoch": 17.76896551724138, "grad_norm": 1.0482252836227417, "learning_rate": 2.1306206896551722e-05, "loss": 0.2853, "step": 5153 }, { "epoch": 17.77241379310345, "grad_norm": 2.0099103450775146, "learning_rate": 2.1310344827586208e-05, "loss": 0.2971, "step": 5154 }, { "epoch": 17.775862068965516, "grad_norm": 1.6210253238677979, "learning_rate": 2.131448275862069e-05, "loss": 0.4096, "step": 5155 }, { "epoch": 17.779310344827586, "grad_norm": 0.8994912505149841, "learning_rate": 2.1318620689655175e-05, "loss": 0.3873, "step": 5156 }, { "epoch": 17.782758620689656, "grad_norm": 0.6583547592163086, "learning_rate": 2.1322758620689654e-05, "loss": 0.2948, "step": 5157 }, { "epoch": 17.786206896551725, "grad_norm": 0.8200904726982117, "learning_rate": 2.132689655172414e-05, "loss": 0.3561, "step": 5158 }, { "epoch": 17.78965517241379, "grad_norm": 0.9676698446273804, "learning_rate": 2.133103448275862e-05, "loss": 0.29, "step": 5159 }, { "epoch": 17.79310344827586, "grad_norm": 0.8435413837432861, "learning_rate": 2.1335172413793103e-05, "loss": 0.2988, "step": 5160 }, { "epoch": 17.79655172413793, "grad_norm": 2.2730164527893066, "learning_rate": 2.133931034482759e-05, "loss": 0.2978, "step": 5161 }, { "epoch": 17.8, "grad_norm": 0.901394784450531, "learning_rate": 2.1343448275862067e-05, "loss": 0.2959, "step": 5162 }, { "epoch": 17.80344827586207, "grad_norm": 0.9342544078826904, "learning_rate": 2.1347586206896553e-05, "loss": 0.2774, "step": 5163 }, { "epoch": 17.806896551724137, "grad_norm": 1.2817937135696411, "learning_rate": 2.1351724137931035e-05, "loss": 0.2663, "step": 5164 }, { "epoch": 17.810344827586206, "grad_norm": 0.8015161156654358, "learning_rate": 2.135586206896552e-05, "loss": 0.2628, "step": 5165 }, { "epoch": 17.813793103448276, "grad_norm": 1.4145238399505615, "learning_rate": 2.136e-05, "loss": 0.2511, "step": 5166 }, { "epoch": 17.817241379310346, "grad_norm": 0.88051438331604, "learning_rate": 2.1364137931034484e-05, "loss": 0.3261, "step": 5167 }, { "epoch": 17.820689655172412, "grad_norm": 0.7910743355751038, "learning_rate": 2.1368275862068966e-05, "loss": 0.2888, "step": 5168 }, { "epoch": 17.824137931034482, "grad_norm": 0.8307827115058899, "learning_rate": 2.137241379310345e-05, "loss": 0.2866, "step": 5169 }, { "epoch": 17.82758620689655, "grad_norm": 1.1873284578323364, "learning_rate": 2.137655172413793e-05, "loss": 0.2607, "step": 5170 }, { "epoch": 17.83103448275862, "grad_norm": 1.0700066089630127, "learning_rate": 2.1380689655172416e-05, "loss": 0.301, "step": 5171 }, { "epoch": 17.83448275862069, "grad_norm": 1.0313972234725952, "learning_rate": 2.1384827586206898e-05, "loss": 0.2784, "step": 5172 }, { "epoch": 17.837931034482757, "grad_norm": 0.6845630407333374, "learning_rate": 2.138896551724138e-05, "loss": 0.2684, "step": 5173 }, { "epoch": 17.841379310344827, "grad_norm": 0.6601988077163696, "learning_rate": 2.1393103448275862e-05, "loss": 0.2777, "step": 5174 }, { "epoch": 17.844827586206897, "grad_norm": 1.866377353668213, "learning_rate": 2.1397241379310344e-05, "loss": 0.2863, "step": 5175 }, { "epoch": 17.848275862068967, "grad_norm": 1.287312626838684, "learning_rate": 2.140137931034483e-05, "loss": 0.3074, "step": 5176 }, { "epoch": 17.851724137931036, "grad_norm": 1.174477458000183, "learning_rate": 2.140551724137931e-05, "loss": 0.2761, "step": 5177 }, { "epoch": 17.855172413793102, "grad_norm": 0.9786810874938965, "learning_rate": 2.1409655172413793e-05, "loss": 0.3108, "step": 5178 }, { "epoch": 17.858620689655172, "grad_norm": 3.5649778842926025, "learning_rate": 2.1413793103448275e-05, "loss": 0.3728, "step": 5179 }, { "epoch": 17.862068965517242, "grad_norm": 2.7491767406463623, "learning_rate": 2.141793103448276e-05, "loss": 0.4552, "step": 5180 }, { "epoch": 17.86551724137931, "grad_norm": 0.724307119846344, "learning_rate": 2.1422068965517243e-05, "loss": 0.3745, "step": 5181 }, { "epoch": 17.868965517241378, "grad_norm": 0.8073214292526245, "learning_rate": 2.1426206896551725e-05, "loss": 0.2928, "step": 5182 }, { "epoch": 17.872413793103448, "grad_norm": 0.7951012849807739, "learning_rate": 2.1430344827586207e-05, "loss": 0.331, "step": 5183 }, { "epoch": 17.875862068965517, "grad_norm": 1.0434423685073853, "learning_rate": 2.1434482758620692e-05, "loss": 0.2876, "step": 5184 }, { "epoch": 17.879310344827587, "grad_norm": 0.7660359144210815, "learning_rate": 2.1438620689655174e-05, "loss": 0.2899, "step": 5185 }, { "epoch": 17.882758620689657, "grad_norm": 1.0242514610290527, "learning_rate": 2.1442758620689653e-05, "loss": 0.2895, "step": 5186 }, { "epoch": 17.886206896551723, "grad_norm": 0.6644101738929749, "learning_rate": 2.1446896551724138e-05, "loss": 0.2952, "step": 5187 }, { "epoch": 17.889655172413793, "grad_norm": 0.7075839042663574, "learning_rate": 2.145103448275862e-05, "loss": 0.3341, "step": 5188 }, { "epoch": 17.893103448275863, "grad_norm": 0.7157542109489441, "learning_rate": 2.1455172413793106e-05, "loss": 0.2943, "step": 5189 }, { "epoch": 17.896551724137932, "grad_norm": 0.6837965846061707, "learning_rate": 2.1459310344827584e-05, "loss": 0.2962, "step": 5190 }, { "epoch": 17.9, "grad_norm": 1.8148136138916016, "learning_rate": 2.146344827586207e-05, "loss": 0.283, "step": 5191 }, { "epoch": 17.90344827586207, "grad_norm": 0.8882818222045898, "learning_rate": 2.146758620689655e-05, "loss": 0.2854, "step": 5192 }, { "epoch": 17.906896551724138, "grad_norm": 1.6296570301055908, "learning_rate": 2.1471724137931037e-05, "loss": 0.2556, "step": 5193 }, { "epoch": 17.910344827586208, "grad_norm": 1.3492441177368164, "learning_rate": 2.147586206896552e-05, "loss": 0.2762, "step": 5194 }, { "epoch": 17.913793103448278, "grad_norm": 0.9653624892234802, "learning_rate": 2.148e-05, "loss": 0.2893, "step": 5195 }, { "epoch": 17.917241379310344, "grad_norm": 0.8990866541862488, "learning_rate": 2.1484137931034483e-05, "loss": 0.262, "step": 5196 }, { "epoch": 17.920689655172414, "grad_norm": 0.9408113360404968, "learning_rate": 2.148827586206897e-05, "loss": 0.2694, "step": 5197 }, { "epoch": 17.924137931034483, "grad_norm": 0.9820714592933655, "learning_rate": 2.149241379310345e-05, "loss": 0.2678, "step": 5198 }, { "epoch": 17.927586206896553, "grad_norm": 0.846095621585846, "learning_rate": 2.149655172413793e-05, "loss": 0.288, "step": 5199 }, { "epoch": 17.93103448275862, "grad_norm": 1.0233663320541382, "learning_rate": 2.1500689655172414e-05, "loss": 0.2848, "step": 5200 }, { "epoch": 17.93448275862069, "grad_norm": 1.0504509210586548, "learning_rate": 2.1504827586206897e-05, "loss": 0.2912, "step": 5201 }, { "epoch": 17.93793103448276, "grad_norm": 0.8064795136451721, "learning_rate": 2.1508965517241382e-05, "loss": 0.2546, "step": 5202 }, { "epoch": 17.94137931034483, "grad_norm": 0.9776341915130615, "learning_rate": 2.151310344827586e-05, "loss": 0.2946, "step": 5203 }, { "epoch": 17.944827586206898, "grad_norm": 1.407076358795166, "learning_rate": 2.1517241379310346e-05, "loss": 0.3215, "step": 5204 }, { "epoch": 17.948275862068964, "grad_norm": 1.5900959968566895, "learning_rate": 2.1521379310344828e-05, "loss": 0.4289, "step": 5205 }, { "epoch": 17.951724137931034, "grad_norm": 0.7441406846046448, "learning_rate": 2.1525517241379313e-05, "loss": 0.3267, "step": 5206 }, { "epoch": 17.955172413793104, "grad_norm": 0.5890430808067322, "learning_rate": 2.1529655172413792e-05, "loss": 0.3127, "step": 5207 }, { "epoch": 17.958620689655174, "grad_norm": 0.540470540523529, "learning_rate": 2.1533793103448277e-05, "loss": 0.2894, "step": 5208 }, { "epoch": 17.96206896551724, "grad_norm": 0.6636950969696045, "learning_rate": 2.153793103448276e-05, "loss": 0.2998, "step": 5209 }, { "epoch": 17.96551724137931, "grad_norm": 0.9210371375083923, "learning_rate": 2.154206896551724e-05, "loss": 0.2772, "step": 5210 }, { "epoch": 17.96896551724138, "grad_norm": 0.6528672575950623, "learning_rate": 2.1546206896551723e-05, "loss": 0.2625, "step": 5211 }, { "epoch": 17.97241379310345, "grad_norm": 0.9010472893714905, "learning_rate": 2.1550344827586205e-05, "loss": 0.3177, "step": 5212 }, { "epoch": 17.97586206896552, "grad_norm": 1.1195743083953857, "learning_rate": 2.155448275862069e-05, "loss": 0.323, "step": 5213 }, { "epoch": 17.979310344827585, "grad_norm": 0.7879833579063416, "learning_rate": 2.1558620689655173e-05, "loss": 0.3221, "step": 5214 }, { "epoch": 17.982758620689655, "grad_norm": 0.9712617993354797, "learning_rate": 2.1562758620689655e-05, "loss": 0.2592, "step": 5215 }, { "epoch": 17.986206896551725, "grad_norm": 1.0319663286209106, "learning_rate": 2.1566896551724137e-05, "loss": 0.2795, "step": 5216 }, { "epoch": 17.989655172413794, "grad_norm": 1.2896324396133423, "learning_rate": 2.1571034482758622e-05, "loss": 0.2658, "step": 5217 }, { "epoch": 17.99310344827586, "grad_norm": 1.6372233629226685, "learning_rate": 2.1575172413793104e-05, "loss": 0.2707, "step": 5218 }, { "epoch": 17.99655172413793, "grad_norm": 2.1524269580841064, "learning_rate": 2.1579310344827586e-05, "loss": 0.2852, "step": 5219 }, { "epoch": 18.0, "grad_norm": 3.235083818435669, "learning_rate": 2.158344827586207e-05, "loss": 0.3371, "step": 5220 }, { "epoch": 18.00344827586207, "grad_norm": 0.725227415561676, "learning_rate": 2.1587586206896554e-05, "loss": 0.3394, "step": 5221 }, { "epoch": 18.00689655172414, "grad_norm": 0.9338841438293457, "learning_rate": 2.1591724137931036e-05, "loss": 0.3175, "step": 5222 }, { "epoch": 18.010344827586206, "grad_norm": 0.6078504323959351, "learning_rate": 2.1595862068965518e-05, "loss": 0.2766, "step": 5223 }, { "epoch": 18.013793103448275, "grad_norm": 0.973746120929718, "learning_rate": 2.16e-05, "loss": 0.2931, "step": 5224 }, { "epoch": 18.017241379310345, "grad_norm": 0.6969736814498901, "learning_rate": 2.1604137931034482e-05, "loss": 0.2576, "step": 5225 }, { "epoch": 18.020689655172415, "grad_norm": 0.6336695551872253, "learning_rate": 2.1608275862068967e-05, "loss": 0.2863, "step": 5226 }, { "epoch": 18.02413793103448, "grad_norm": 1.1023646593093872, "learning_rate": 2.161241379310345e-05, "loss": 0.3092, "step": 5227 }, { "epoch": 18.02758620689655, "grad_norm": 2.1902408599853516, "learning_rate": 2.161655172413793e-05, "loss": 0.3086, "step": 5228 }, { "epoch": 18.03103448275862, "grad_norm": 0.6957425475120544, "learning_rate": 2.1620689655172413e-05, "loss": 0.265, "step": 5229 }, { "epoch": 18.03448275862069, "grad_norm": 0.7741485834121704, "learning_rate": 2.16248275862069e-05, "loss": 0.2751, "step": 5230 }, { "epoch": 18.03793103448276, "grad_norm": 0.8266746997833252, "learning_rate": 2.162896551724138e-05, "loss": 0.3179, "step": 5231 }, { "epoch": 18.041379310344826, "grad_norm": 0.5210701823234558, "learning_rate": 2.1633103448275863e-05, "loss": 0.2819, "step": 5232 }, { "epoch": 18.044827586206896, "grad_norm": 0.8013619184494019, "learning_rate": 2.1637241379310345e-05, "loss": 0.2428, "step": 5233 }, { "epoch": 18.048275862068966, "grad_norm": 1.2670196294784546, "learning_rate": 2.164137931034483e-05, "loss": 0.2859, "step": 5234 }, { "epoch": 18.051724137931036, "grad_norm": 0.8015270233154297, "learning_rate": 2.1645517241379312e-05, "loss": 0.2751, "step": 5235 }, { "epoch": 18.055172413793102, "grad_norm": 0.7045295834541321, "learning_rate": 2.164965517241379e-05, "loss": 0.2461, "step": 5236 }, { "epoch": 18.05862068965517, "grad_norm": 0.9250706434249878, "learning_rate": 2.1653793103448276e-05, "loss": 0.2582, "step": 5237 }, { "epoch": 18.06206896551724, "grad_norm": 0.8304339647293091, "learning_rate": 2.1657931034482758e-05, "loss": 0.2784, "step": 5238 }, { "epoch": 18.06551724137931, "grad_norm": 1.2104698419570923, "learning_rate": 2.1662068965517244e-05, "loss": 0.2706, "step": 5239 }, { "epoch": 18.06896551724138, "grad_norm": 2.836655378341675, "learning_rate": 2.1666206896551722e-05, "loss": 0.2683, "step": 5240 }, { "epoch": 18.072413793103447, "grad_norm": 0.873693585395813, "learning_rate": 2.1670344827586208e-05, "loss": 0.2711, "step": 5241 }, { "epoch": 18.075862068965517, "grad_norm": 1.0486719608306885, "learning_rate": 2.167448275862069e-05, "loss": 0.2543, "step": 5242 }, { "epoch": 18.079310344827586, "grad_norm": 0.9850393533706665, "learning_rate": 2.1678620689655175e-05, "loss": 0.2526, "step": 5243 }, { "epoch": 18.082758620689656, "grad_norm": 1.3299866914749146, "learning_rate": 2.1682758620689654e-05, "loss": 0.3378, "step": 5244 }, { "epoch": 18.086206896551722, "grad_norm": 2.0741288661956787, "learning_rate": 2.168689655172414e-05, "loss": 0.4353, "step": 5245 }, { "epoch": 18.089655172413792, "grad_norm": 0.6978784203529358, "learning_rate": 2.169103448275862e-05, "loss": 0.3768, "step": 5246 }, { "epoch": 18.093103448275862, "grad_norm": 0.8531368970870972, "learning_rate": 2.1695172413793107e-05, "loss": 0.2689, "step": 5247 }, { "epoch": 18.09655172413793, "grad_norm": 0.603246808052063, "learning_rate": 2.1699310344827585e-05, "loss": 0.2788, "step": 5248 }, { "epoch": 18.1, "grad_norm": 0.5717958211898804, "learning_rate": 2.1703448275862067e-05, "loss": 0.2988, "step": 5249 }, { "epoch": 18.103448275862068, "grad_norm": 0.5432345867156982, "learning_rate": 2.1707586206896553e-05, "loss": 0.284, "step": 5250 }, { "epoch": 18.106896551724137, "grad_norm": 0.9595227837562561, "learning_rate": 2.1711724137931035e-05, "loss": 0.2959, "step": 5251 }, { "epoch": 18.110344827586207, "grad_norm": 1.4645363092422485, "learning_rate": 2.1715862068965517e-05, "loss": 0.2861, "step": 5252 }, { "epoch": 18.113793103448277, "grad_norm": 0.6796119213104248, "learning_rate": 2.172e-05, "loss": 0.2935, "step": 5253 }, { "epoch": 18.117241379310343, "grad_norm": 1.74441397190094, "learning_rate": 2.1724137931034484e-05, "loss": 0.2764, "step": 5254 }, { "epoch": 18.120689655172413, "grad_norm": 0.7550939321517944, "learning_rate": 2.1728275862068966e-05, "loss": 0.2618, "step": 5255 }, { "epoch": 18.124137931034483, "grad_norm": 1.0192413330078125, "learning_rate": 2.1732413793103448e-05, "loss": 0.2836, "step": 5256 }, { "epoch": 18.127586206896552, "grad_norm": 0.842319905757904, "learning_rate": 2.173655172413793e-05, "loss": 0.305, "step": 5257 }, { "epoch": 18.131034482758622, "grad_norm": 0.8178362250328064, "learning_rate": 2.1740689655172416e-05, "loss": 0.2745, "step": 5258 }, { "epoch": 18.13448275862069, "grad_norm": 1.2013177871704102, "learning_rate": 2.1744827586206898e-05, "loss": 0.2813, "step": 5259 }, { "epoch": 18.137931034482758, "grad_norm": 2.958836793899536, "learning_rate": 2.1748965517241383e-05, "loss": 0.2354, "step": 5260 }, { "epoch": 18.141379310344828, "grad_norm": 0.853506326675415, "learning_rate": 2.175310344827586e-05, "loss": 0.2587, "step": 5261 }, { "epoch": 18.144827586206898, "grad_norm": 0.7613204121589661, "learning_rate": 2.1757241379310344e-05, "loss": 0.2572, "step": 5262 }, { "epoch": 18.148275862068967, "grad_norm": 1.016116738319397, "learning_rate": 2.176137931034483e-05, "loss": 0.2605, "step": 5263 }, { "epoch": 18.151724137931033, "grad_norm": 0.6768178939819336, "learning_rate": 2.176551724137931e-05, "loss": 0.2428, "step": 5264 }, { "epoch": 18.155172413793103, "grad_norm": 1.3933703899383545, "learning_rate": 2.1769655172413793e-05, "loss": 0.2786, "step": 5265 }, { "epoch": 18.158620689655173, "grad_norm": 1.721091866493225, "learning_rate": 2.1773793103448275e-05, "loss": 0.2778, "step": 5266 }, { "epoch": 18.162068965517243, "grad_norm": 1.8028078079223633, "learning_rate": 2.177793103448276e-05, "loss": 0.2548, "step": 5267 }, { "epoch": 18.16551724137931, "grad_norm": 1.051818609237671, "learning_rate": 2.1782068965517243e-05, "loss": 0.2683, "step": 5268 }, { "epoch": 18.16896551724138, "grad_norm": 0.989807665348053, "learning_rate": 2.1786206896551725e-05, "loss": 0.2722, "step": 5269 }, { "epoch": 18.17241379310345, "grad_norm": 1.5387790203094482, "learning_rate": 2.1790344827586207e-05, "loss": 0.4306, "step": 5270 }, { "epoch": 18.175862068965518, "grad_norm": 0.901849627494812, "learning_rate": 2.1794482758620692e-05, "loss": 0.3151, "step": 5271 }, { "epoch": 18.179310344827588, "grad_norm": 0.6746479868888855, "learning_rate": 2.1798620689655174e-05, "loss": 0.3193, "step": 5272 }, { "epoch": 18.182758620689654, "grad_norm": 0.802870512008667, "learning_rate": 2.1802758620689656e-05, "loss": 0.2932, "step": 5273 }, { "epoch": 18.186206896551724, "grad_norm": 0.5822077989578247, "learning_rate": 2.1806896551724138e-05, "loss": 0.2612, "step": 5274 }, { "epoch": 18.189655172413794, "grad_norm": 1.1616973876953125, "learning_rate": 2.181103448275862e-05, "loss": 0.2911, "step": 5275 }, { "epoch": 18.193103448275863, "grad_norm": 0.8027050495147705, "learning_rate": 2.1815172413793105e-05, "loss": 0.2725, "step": 5276 }, { "epoch": 18.19655172413793, "grad_norm": 0.9995896220207214, "learning_rate": 2.1819310344827584e-05, "loss": 0.2721, "step": 5277 }, { "epoch": 18.2, "grad_norm": 0.7621623873710632, "learning_rate": 2.182344827586207e-05, "loss": 0.2733, "step": 5278 }, { "epoch": 18.20344827586207, "grad_norm": 0.8455886840820312, "learning_rate": 2.182758620689655e-05, "loss": 0.2475, "step": 5279 }, { "epoch": 18.20689655172414, "grad_norm": 0.8178327679634094, "learning_rate": 2.1831724137931037e-05, "loss": 0.2683, "step": 5280 }, { "epoch": 18.21034482758621, "grad_norm": 0.7684647440910339, "learning_rate": 2.1835862068965516e-05, "loss": 0.3049, "step": 5281 }, { "epoch": 18.213793103448275, "grad_norm": 0.7506880164146423, "learning_rate": 2.184e-05, "loss": 0.2721, "step": 5282 }, { "epoch": 18.217241379310344, "grad_norm": 0.9366299510002136, "learning_rate": 2.1844137931034483e-05, "loss": 0.2575, "step": 5283 }, { "epoch": 18.220689655172414, "grad_norm": 1.032681941986084, "learning_rate": 2.184827586206897e-05, "loss": 0.2717, "step": 5284 }, { "epoch": 18.224137931034484, "grad_norm": 1.1227396726608276, "learning_rate": 2.1852413793103447e-05, "loss": 0.281, "step": 5285 }, { "epoch": 18.22758620689655, "grad_norm": 1.1289019584655762, "learning_rate": 2.1856551724137932e-05, "loss": 0.2575, "step": 5286 }, { "epoch": 18.23103448275862, "grad_norm": 1.5502938032150269, "learning_rate": 2.1860689655172414e-05, "loss": 0.2868, "step": 5287 }, { "epoch": 18.23448275862069, "grad_norm": 0.7734251618385315, "learning_rate": 2.1864827586206896e-05, "loss": 0.29, "step": 5288 }, { "epoch": 18.23793103448276, "grad_norm": 0.7877610325813293, "learning_rate": 2.186896551724138e-05, "loss": 0.272, "step": 5289 }, { "epoch": 18.24137931034483, "grad_norm": 0.9840954542160034, "learning_rate": 2.187310344827586e-05, "loss": 0.2828, "step": 5290 }, { "epoch": 18.244827586206895, "grad_norm": 0.7185224890708923, "learning_rate": 2.1877241379310346e-05, "loss": 0.2688, "step": 5291 }, { "epoch": 18.248275862068965, "grad_norm": 0.7280076742172241, "learning_rate": 2.1881379310344828e-05, "loss": 0.2503, "step": 5292 }, { "epoch": 18.251724137931035, "grad_norm": 1.202465534210205, "learning_rate": 2.1885517241379313e-05, "loss": 0.3097, "step": 5293 }, { "epoch": 18.255172413793105, "grad_norm": 1.443971037864685, "learning_rate": 2.1889655172413792e-05, "loss": 0.3307, "step": 5294 }, { "epoch": 18.25862068965517, "grad_norm": 1.4382380247116089, "learning_rate": 2.1893793103448277e-05, "loss": 0.3715, "step": 5295 }, { "epoch": 18.26206896551724, "grad_norm": 0.8070908188819885, "learning_rate": 2.189793103448276e-05, "loss": 0.3334, "step": 5296 }, { "epoch": 18.26551724137931, "grad_norm": 0.6647459864616394, "learning_rate": 2.1902068965517245e-05, "loss": 0.3077, "step": 5297 }, { "epoch": 18.26896551724138, "grad_norm": 0.7114371061325073, "learning_rate": 2.1906206896551723e-05, "loss": 0.2719, "step": 5298 }, { "epoch": 18.27241379310345, "grad_norm": 0.7529367208480835, "learning_rate": 2.191034482758621e-05, "loss": 0.3046, "step": 5299 }, { "epoch": 18.275862068965516, "grad_norm": 0.6939551830291748, "learning_rate": 2.191448275862069e-05, "loss": 0.2873, "step": 5300 }, { "epoch": 18.279310344827586, "grad_norm": 0.8459286689758301, "learning_rate": 2.1918620689655173e-05, "loss": 0.2628, "step": 5301 }, { "epoch": 18.282758620689656, "grad_norm": 0.9060184359550476, "learning_rate": 2.1922758620689655e-05, "loss": 0.3056, "step": 5302 }, { "epoch": 18.286206896551725, "grad_norm": 0.7329578399658203, "learning_rate": 2.1926896551724137e-05, "loss": 0.2949, "step": 5303 }, { "epoch": 18.28965517241379, "grad_norm": 0.7789405584335327, "learning_rate": 2.1931034482758622e-05, "loss": 0.2589, "step": 5304 }, { "epoch": 18.29310344827586, "grad_norm": 0.9538295865058899, "learning_rate": 2.1935172413793104e-05, "loss": 0.2839, "step": 5305 }, { "epoch": 18.29655172413793, "grad_norm": 1.065955400466919, "learning_rate": 2.1939310344827586e-05, "loss": 0.2957, "step": 5306 }, { "epoch": 18.3, "grad_norm": 0.9895902276039124, "learning_rate": 2.194344827586207e-05, "loss": 0.305, "step": 5307 }, { "epoch": 18.30344827586207, "grad_norm": 0.7466364502906799, "learning_rate": 2.1947586206896554e-05, "loss": 0.2736, "step": 5308 }, { "epoch": 18.306896551724137, "grad_norm": 0.9849151372909546, "learning_rate": 2.1951724137931036e-05, "loss": 0.2974, "step": 5309 }, { "epoch": 18.310344827586206, "grad_norm": 1.041496992111206, "learning_rate": 2.1955862068965518e-05, "loss": 0.256, "step": 5310 }, { "epoch": 18.313793103448276, "grad_norm": 0.9358316659927368, "learning_rate": 2.196e-05, "loss": 0.2834, "step": 5311 }, { "epoch": 18.317241379310346, "grad_norm": 1.105467438697815, "learning_rate": 2.1964137931034485e-05, "loss": 0.2811, "step": 5312 }, { "epoch": 18.320689655172412, "grad_norm": 0.9347025156021118, "learning_rate": 2.1968275862068967e-05, "loss": 0.3141, "step": 5313 }, { "epoch": 18.324137931034482, "grad_norm": 0.9205333590507507, "learning_rate": 2.1972413793103446e-05, "loss": 0.2692, "step": 5314 }, { "epoch": 18.32758620689655, "grad_norm": 1.0850920677185059, "learning_rate": 2.197655172413793e-05, "loss": 0.2726, "step": 5315 }, { "epoch": 18.33103448275862, "grad_norm": 1.1639646291732788, "learning_rate": 2.1980689655172413e-05, "loss": 0.3225, "step": 5316 }, { "epoch": 18.33448275862069, "grad_norm": 1.0421984195709229, "learning_rate": 2.19848275862069e-05, "loss": 0.2766, "step": 5317 }, { "epoch": 18.337931034482757, "grad_norm": 1.1726921796798706, "learning_rate": 2.1988965517241377e-05, "loss": 0.3051, "step": 5318 }, { "epoch": 18.341379310344827, "grad_norm": 1.17181396484375, "learning_rate": 2.1993103448275863e-05, "loss": 0.3072, "step": 5319 }, { "epoch": 18.344827586206897, "grad_norm": 2.4741122722625732, "learning_rate": 2.1997241379310345e-05, "loss": 0.3889, "step": 5320 }, { "epoch": 18.348275862068967, "grad_norm": 0.7374497652053833, "learning_rate": 2.200137931034483e-05, "loss": 0.3303, "step": 5321 }, { "epoch": 18.351724137931033, "grad_norm": 0.8633158206939697, "learning_rate": 2.200551724137931e-05, "loss": 0.2939, "step": 5322 }, { "epoch": 18.355172413793102, "grad_norm": 0.6387122869491577, "learning_rate": 2.2009655172413794e-05, "loss": 0.2871, "step": 5323 }, { "epoch": 18.358620689655172, "grad_norm": 0.8980454802513123, "learning_rate": 2.2013793103448276e-05, "loss": 0.2932, "step": 5324 }, { "epoch": 18.362068965517242, "grad_norm": 0.8653892278671265, "learning_rate": 2.201793103448276e-05, "loss": 0.2874, "step": 5325 }, { "epoch": 18.36551724137931, "grad_norm": 0.7079808712005615, "learning_rate": 2.2022068965517244e-05, "loss": 0.295, "step": 5326 }, { "epoch": 18.368965517241378, "grad_norm": 1.3864558935165405, "learning_rate": 2.2026206896551722e-05, "loss": 0.2528, "step": 5327 }, { "epoch": 18.372413793103448, "grad_norm": 0.5889486074447632, "learning_rate": 2.2030344827586208e-05, "loss": 0.2597, "step": 5328 }, { "epoch": 18.375862068965517, "grad_norm": 0.8524258732795715, "learning_rate": 2.203448275862069e-05, "loss": 0.2517, "step": 5329 }, { "epoch": 18.379310344827587, "grad_norm": 0.9832419157028198, "learning_rate": 2.2038620689655175e-05, "loss": 0.284, "step": 5330 }, { "epoch": 18.382758620689657, "grad_norm": 0.8994970917701721, "learning_rate": 2.2042758620689654e-05, "loss": 0.2625, "step": 5331 }, { "epoch": 18.386206896551723, "grad_norm": 0.828851580619812, "learning_rate": 2.204689655172414e-05, "loss": 0.2449, "step": 5332 }, { "epoch": 18.389655172413793, "grad_norm": 1.1948931217193604, "learning_rate": 2.205103448275862e-05, "loss": 0.2732, "step": 5333 }, { "epoch": 18.393103448275863, "grad_norm": 0.8109039068222046, "learning_rate": 2.2055172413793107e-05, "loss": 0.2631, "step": 5334 }, { "epoch": 18.396551724137932, "grad_norm": 0.8373278379440308, "learning_rate": 2.2059310344827585e-05, "loss": 0.279, "step": 5335 }, { "epoch": 18.4, "grad_norm": 0.661611020565033, "learning_rate": 2.206344827586207e-05, "loss": 0.2649, "step": 5336 }, { "epoch": 18.40344827586207, "grad_norm": 1.5309916734695435, "learning_rate": 2.2067586206896553e-05, "loss": 0.2827, "step": 5337 }, { "epoch": 18.406896551724138, "grad_norm": 0.9394921660423279, "learning_rate": 2.2071724137931038e-05, "loss": 0.2427, "step": 5338 }, { "epoch": 18.410344827586208, "grad_norm": 0.9433667063713074, "learning_rate": 2.2075862068965517e-05, "loss": 0.2651, "step": 5339 }, { "epoch": 18.413793103448278, "grad_norm": 1.445773959159851, "learning_rate": 2.208e-05, "loss": 0.2384, "step": 5340 }, { "epoch": 18.417241379310344, "grad_norm": 0.9377464652061462, "learning_rate": 2.2084137931034484e-05, "loss": 0.2721, "step": 5341 }, { "epoch": 18.420689655172414, "grad_norm": 1.5058785676956177, "learning_rate": 2.2088275862068966e-05, "loss": 0.3025, "step": 5342 }, { "epoch": 18.424137931034483, "grad_norm": 2.0354843139648438, "learning_rate": 2.2092413793103448e-05, "loss": 0.2622, "step": 5343 }, { "epoch": 18.427586206896553, "grad_norm": 1.747151494026184, "learning_rate": 2.209655172413793e-05, "loss": 0.3147, "step": 5344 }, { "epoch": 18.43103448275862, "grad_norm": 2.549170970916748, "learning_rate": 2.2100689655172416e-05, "loss": 0.3475, "step": 5345 }, { "epoch": 18.43448275862069, "grad_norm": 0.804081380367279, "learning_rate": 2.2104827586206898e-05, "loss": 0.3312, "step": 5346 }, { "epoch": 18.43793103448276, "grad_norm": 0.73294597864151, "learning_rate": 2.210896551724138e-05, "loss": 0.2664, "step": 5347 }, { "epoch": 18.44137931034483, "grad_norm": 0.5508712530136108, "learning_rate": 2.211310344827586e-05, "loss": 0.2919, "step": 5348 }, { "epoch": 18.444827586206898, "grad_norm": 1.9861912727355957, "learning_rate": 2.2117241379310347e-05, "loss": 0.2806, "step": 5349 }, { "epoch": 18.448275862068964, "grad_norm": 0.8558562397956848, "learning_rate": 2.212137931034483e-05, "loss": 0.2718, "step": 5350 }, { "epoch": 18.451724137931034, "grad_norm": 1.3840571641921997, "learning_rate": 2.212551724137931e-05, "loss": 0.2617, "step": 5351 }, { "epoch": 18.455172413793104, "grad_norm": 0.8725120425224304, "learning_rate": 2.2129655172413793e-05, "loss": 0.2774, "step": 5352 }, { "epoch": 18.458620689655174, "grad_norm": 1.1723852157592773, "learning_rate": 2.2133793103448275e-05, "loss": 0.3278, "step": 5353 }, { "epoch": 18.46206896551724, "grad_norm": 0.8056606650352478, "learning_rate": 2.213793103448276e-05, "loss": 0.2871, "step": 5354 }, { "epoch": 18.46551724137931, "grad_norm": 1.1524511575698853, "learning_rate": 2.214206896551724e-05, "loss": 0.2662, "step": 5355 }, { "epoch": 18.46896551724138, "grad_norm": 0.7601949572563171, "learning_rate": 2.2146206896551725e-05, "loss": 0.2689, "step": 5356 }, { "epoch": 18.47241379310345, "grad_norm": 0.826851487159729, "learning_rate": 2.2150344827586207e-05, "loss": 0.2545, "step": 5357 }, { "epoch": 18.47586206896552, "grad_norm": 0.6195048093795776, "learning_rate": 2.2154482758620692e-05, "loss": 0.2712, "step": 5358 }, { "epoch": 18.479310344827585, "grad_norm": 1.2579333782196045, "learning_rate": 2.2158620689655174e-05, "loss": 0.2866, "step": 5359 }, { "epoch": 18.482758620689655, "grad_norm": 0.965313196182251, "learning_rate": 2.2162758620689656e-05, "loss": 0.3018, "step": 5360 }, { "epoch": 18.486206896551725, "grad_norm": 0.6949664950370789, "learning_rate": 2.2166896551724138e-05, "loss": 0.2856, "step": 5361 }, { "epoch": 18.489655172413794, "grad_norm": 0.8006588816642761, "learning_rate": 2.2171034482758623e-05, "loss": 0.2668, "step": 5362 }, { "epoch": 18.49310344827586, "grad_norm": 1.0553022623062134, "learning_rate": 2.2175172413793105e-05, "loss": 0.2579, "step": 5363 }, { "epoch": 18.49655172413793, "grad_norm": 4.162227630615234, "learning_rate": 2.2179310344827587e-05, "loss": 0.2822, "step": 5364 }, { "epoch": 18.5, "grad_norm": 6.087271213531494, "learning_rate": 2.218344827586207e-05, "loss": 0.2859, "step": 5365 }, { "epoch": 18.50344827586207, "grad_norm": 1.0522884130477905, "learning_rate": 2.218758620689655e-05, "loss": 0.3046, "step": 5366 }, { "epoch": 18.50689655172414, "grad_norm": 2.370609760284424, "learning_rate": 2.2191724137931037e-05, "loss": 0.2672, "step": 5367 }, { "epoch": 18.510344827586206, "grad_norm": 0.8621776700019836, "learning_rate": 2.2195862068965515e-05, "loss": 0.3063, "step": 5368 }, { "epoch": 18.513793103448275, "grad_norm": 2.802729845046997, "learning_rate": 2.22e-05, "loss": 0.2857, "step": 5369 }, { "epoch": 18.517241379310345, "grad_norm": 2.328603506088257, "learning_rate": 2.2204137931034483e-05, "loss": 0.3828, "step": 5370 }, { "epoch": 18.520689655172415, "grad_norm": 0.7890995144844055, "learning_rate": 2.220827586206897e-05, "loss": 0.3522, "step": 5371 }, { "epoch": 18.52413793103448, "grad_norm": 0.9185176491737366, "learning_rate": 2.2212413793103447e-05, "loss": 0.3, "step": 5372 }, { "epoch": 18.52758620689655, "grad_norm": 0.7168757319450378, "learning_rate": 2.2216551724137932e-05, "loss": 0.2934, "step": 5373 }, { "epoch": 18.53103448275862, "grad_norm": 0.8789031505584717, "learning_rate": 2.2220689655172414e-05, "loss": 0.2849, "step": 5374 }, { "epoch": 18.53448275862069, "grad_norm": 0.6052097678184509, "learning_rate": 2.22248275862069e-05, "loss": 0.3089, "step": 5375 }, { "epoch": 18.53793103448276, "grad_norm": 0.778863787651062, "learning_rate": 2.222896551724138e-05, "loss": 0.2799, "step": 5376 }, { "epoch": 18.541379310344826, "grad_norm": 0.6890180706977844, "learning_rate": 2.2233103448275864e-05, "loss": 0.2885, "step": 5377 }, { "epoch": 18.544827586206896, "grad_norm": 1.6419627666473389, "learning_rate": 2.2237241379310346e-05, "loss": 0.2765, "step": 5378 }, { "epoch": 18.548275862068966, "grad_norm": 1.6150177717208862, "learning_rate": 2.2241379310344828e-05, "loss": 0.2801, "step": 5379 }, { "epoch": 18.551724137931036, "grad_norm": 1.7834563255310059, "learning_rate": 2.224551724137931e-05, "loss": 0.3001, "step": 5380 }, { "epoch": 18.555172413793102, "grad_norm": 1.095492959022522, "learning_rate": 2.2249655172413792e-05, "loss": 0.2469, "step": 5381 }, { "epoch": 18.55862068965517, "grad_norm": 0.8162360787391663, "learning_rate": 2.2253793103448277e-05, "loss": 0.2766, "step": 5382 }, { "epoch": 18.56206896551724, "grad_norm": 0.8255149126052856, "learning_rate": 2.225793103448276e-05, "loss": 0.2727, "step": 5383 }, { "epoch": 18.56551724137931, "grad_norm": 1.7939592599868774, "learning_rate": 2.226206896551724e-05, "loss": 0.2571, "step": 5384 }, { "epoch": 18.56896551724138, "grad_norm": 0.9370687007904053, "learning_rate": 2.2266206896551723e-05, "loss": 0.2717, "step": 5385 }, { "epoch": 18.572413793103447, "grad_norm": 1.030880093574524, "learning_rate": 2.227034482758621e-05, "loss": 0.2781, "step": 5386 }, { "epoch": 18.575862068965517, "grad_norm": 1.366518497467041, "learning_rate": 2.227448275862069e-05, "loss": 0.2876, "step": 5387 }, { "epoch": 18.579310344827586, "grad_norm": 0.9107691645622253, "learning_rate": 2.2278620689655173e-05, "loss": 0.2785, "step": 5388 }, { "epoch": 18.582758620689656, "grad_norm": 0.8378852009773254, "learning_rate": 2.2282758620689655e-05, "loss": 0.2544, "step": 5389 }, { "epoch": 18.586206896551722, "grad_norm": 0.8623158931732178, "learning_rate": 2.228689655172414e-05, "loss": 0.2881, "step": 5390 }, { "epoch": 18.589655172413792, "grad_norm": 1.2657417058944702, "learning_rate": 2.2291034482758622e-05, "loss": 0.2793, "step": 5391 }, { "epoch": 18.593103448275862, "grad_norm": 1.2392243146896362, "learning_rate": 2.2295172413793104e-05, "loss": 0.2968, "step": 5392 }, { "epoch": 18.59655172413793, "grad_norm": 0.8847397565841675, "learning_rate": 2.2299310344827586e-05, "loss": 0.2756, "step": 5393 }, { "epoch": 18.6, "grad_norm": 1.7154459953308105, "learning_rate": 2.2303448275862068e-05, "loss": 0.2784, "step": 5394 }, { "epoch": 18.603448275862068, "grad_norm": 4.548151016235352, "learning_rate": 2.2307586206896554e-05, "loss": 0.4, "step": 5395 }, { "epoch": 18.606896551724137, "grad_norm": 0.7477134466171265, "learning_rate": 2.2311724137931036e-05, "loss": 0.353, "step": 5396 }, { "epoch": 18.610344827586207, "grad_norm": 0.9504026174545288, "learning_rate": 2.2315862068965518e-05, "loss": 0.3071, "step": 5397 }, { "epoch": 18.613793103448277, "grad_norm": 1.3988821506500244, "learning_rate": 2.232e-05, "loss": 0.2988, "step": 5398 }, { "epoch": 18.617241379310343, "grad_norm": 0.9210456013679504, "learning_rate": 2.2324137931034485e-05, "loss": 0.2986, "step": 5399 }, { "epoch": 18.620689655172413, "grad_norm": 0.612354576587677, "learning_rate": 2.2328275862068967e-05, "loss": 0.2539, "step": 5400 }, { "epoch": 18.624137931034483, "grad_norm": 0.7614344358444214, "learning_rate": 2.233241379310345e-05, "loss": 0.2739, "step": 5401 }, { "epoch": 18.627586206896552, "grad_norm": 0.6935577988624573, "learning_rate": 2.233655172413793e-05, "loss": 0.2922, "step": 5402 }, { "epoch": 18.631034482758622, "grad_norm": 0.6740681529045105, "learning_rate": 2.2340689655172413e-05, "loss": 0.2714, "step": 5403 }, { "epoch": 18.63448275862069, "grad_norm": 0.8176151514053345, "learning_rate": 2.23448275862069e-05, "loss": 0.2922, "step": 5404 }, { "epoch": 18.637931034482758, "grad_norm": 0.867354154586792, "learning_rate": 2.2348965517241377e-05, "loss": 0.2516, "step": 5405 }, { "epoch": 18.641379310344828, "grad_norm": 0.8482561707496643, "learning_rate": 2.2353103448275863e-05, "loss": 0.2543, "step": 5406 }, { "epoch": 18.644827586206898, "grad_norm": 1.0563756227493286, "learning_rate": 2.2357241379310345e-05, "loss": 0.2742, "step": 5407 }, { "epoch": 18.648275862068964, "grad_norm": 2.4731667041778564, "learning_rate": 2.236137931034483e-05, "loss": 0.2499, "step": 5408 }, { "epoch": 18.651724137931033, "grad_norm": 2.0579988956451416, "learning_rate": 2.236551724137931e-05, "loss": 0.2694, "step": 5409 }, { "epoch": 18.655172413793103, "grad_norm": 1.1647058725357056, "learning_rate": 2.2369655172413794e-05, "loss": 0.282, "step": 5410 }, { "epoch": 18.658620689655173, "grad_norm": 1.1146159172058105, "learning_rate": 2.2373793103448276e-05, "loss": 0.2661, "step": 5411 }, { "epoch": 18.662068965517243, "grad_norm": 2.28442645072937, "learning_rate": 2.237793103448276e-05, "loss": 0.2808, "step": 5412 }, { "epoch": 18.66551724137931, "grad_norm": 0.8141090869903564, "learning_rate": 2.238206896551724e-05, "loss": 0.2789, "step": 5413 }, { "epoch": 18.66896551724138, "grad_norm": 1.2529816627502441, "learning_rate": 2.2386206896551726e-05, "loss": 0.259, "step": 5414 }, { "epoch": 18.67241379310345, "grad_norm": 1.761046290397644, "learning_rate": 2.2390344827586208e-05, "loss": 0.275, "step": 5415 }, { "epoch": 18.675862068965518, "grad_norm": 0.787290096282959, "learning_rate": 2.239448275862069e-05, "loss": 0.2858, "step": 5416 }, { "epoch": 18.679310344827588, "grad_norm": 0.9550169110298157, "learning_rate": 2.239862068965517e-05, "loss": 0.2738, "step": 5417 }, { "epoch": 18.682758620689654, "grad_norm": 1.1088306903839111, "learning_rate": 2.2402758620689654e-05, "loss": 0.2611, "step": 5418 }, { "epoch": 18.686206896551724, "grad_norm": 2.3255577087402344, "learning_rate": 2.240689655172414e-05, "loss": 0.2975, "step": 5419 }, { "epoch": 18.689655172413794, "grad_norm": 2.1549971103668213, "learning_rate": 2.241103448275862e-05, "loss": 0.3979, "step": 5420 }, { "epoch": 18.693103448275863, "grad_norm": 0.7654474973678589, "learning_rate": 2.2415172413793103e-05, "loss": 0.3086, "step": 5421 }, { "epoch": 18.69655172413793, "grad_norm": 0.6288106441497803, "learning_rate": 2.2419310344827585e-05, "loss": 0.2751, "step": 5422 }, { "epoch": 18.7, "grad_norm": 0.9478097558021545, "learning_rate": 2.242344827586207e-05, "loss": 0.3256, "step": 5423 }, { "epoch": 18.70344827586207, "grad_norm": 0.9022949934005737, "learning_rate": 2.2427586206896553e-05, "loss": 0.2631, "step": 5424 }, { "epoch": 18.70689655172414, "grad_norm": 0.9792685508728027, "learning_rate": 2.2431724137931038e-05, "loss": 0.2881, "step": 5425 }, { "epoch": 18.71034482758621, "grad_norm": 0.8723429441452026, "learning_rate": 2.2435862068965517e-05, "loss": 0.3001, "step": 5426 }, { "epoch": 18.713793103448275, "grad_norm": 0.7261333465576172, "learning_rate": 2.2440000000000002e-05, "loss": 0.2716, "step": 5427 }, { "epoch": 18.717241379310344, "grad_norm": 1.0789155960083008, "learning_rate": 2.2444137931034484e-05, "loss": 0.2962, "step": 5428 }, { "epoch": 18.720689655172414, "grad_norm": 0.7379997372627258, "learning_rate": 2.2448275862068966e-05, "loss": 0.242, "step": 5429 }, { "epoch": 18.724137931034484, "grad_norm": 0.6057190895080566, "learning_rate": 2.2452413793103448e-05, "loss": 0.2322, "step": 5430 }, { "epoch": 18.72758620689655, "grad_norm": 1.1716034412384033, "learning_rate": 2.245655172413793e-05, "loss": 0.2735, "step": 5431 }, { "epoch": 18.73103448275862, "grad_norm": 1.567262053489685, "learning_rate": 2.2460689655172415e-05, "loss": 0.3314, "step": 5432 }, { "epoch": 18.73448275862069, "grad_norm": 1.019039511680603, "learning_rate": 2.2464827586206897e-05, "loss": 0.2437, "step": 5433 }, { "epoch": 18.73793103448276, "grad_norm": 1.3900539875030518, "learning_rate": 2.246896551724138e-05, "loss": 0.2642, "step": 5434 }, { "epoch": 18.74137931034483, "grad_norm": 0.7931538820266724, "learning_rate": 2.247310344827586e-05, "loss": 0.305, "step": 5435 }, { "epoch": 18.744827586206895, "grad_norm": 0.8643925786018372, "learning_rate": 2.2477241379310347e-05, "loss": 0.2798, "step": 5436 }, { "epoch": 18.748275862068965, "grad_norm": 0.7886937260627747, "learning_rate": 2.248137931034483e-05, "loss": 0.2637, "step": 5437 }, { "epoch": 18.751724137931035, "grad_norm": 1.021544098854065, "learning_rate": 2.248551724137931e-05, "loss": 0.2626, "step": 5438 }, { "epoch": 18.755172413793105, "grad_norm": 0.8319028615951538, "learning_rate": 2.2489655172413793e-05, "loss": 0.3184, "step": 5439 }, { "epoch": 18.75862068965517, "grad_norm": 1.919639229774475, "learning_rate": 2.249379310344828e-05, "loss": 0.2974, "step": 5440 }, { "epoch": 18.76206896551724, "grad_norm": 0.9137845635414124, "learning_rate": 2.249793103448276e-05, "loss": 0.2993, "step": 5441 }, { "epoch": 18.76551724137931, "grad_norm": 1.324806809425354, "learning_rate": 2.250206896551724e-05, "loss": 0.2577, "step": 5442 }, { "epoch": 18.76896551724138, "grad_norm": 1.6019057035446167, "learning_rate": 2.2506206896551724e-05, "loss": 0.277, "step": 5443 }, { "epoch": 18.77241379310345, "grad_norm": 1.4311597347259521, "learning_rate": 2.2510344827586206e-05, "loss": 0.3428, "step": 5444 }, { "epoch": 18.775862068965516, "grad_norm": 2.1291627883911133, "learning_rate": 2.2514482758620692e-05, "loss": 0.421, "step": 5445 }, { "epoch": 18.779310344827586, "grad_norm": 0.6323930025100708, "learning_rate": 2.251862068965517e-05, "loss": 0.3148, "step": 5446 }, { "epoch": 18.782758620689656, "grad_norm": 0.7495322823524475, "learning_rate": 2.2522758620689656e-05, "loss": 0.3191, "step": 5447 }, { "epoch": 18.786206896551725, "grad_norm": 0.6751822829246521, "learning_rate": 2.2526896551724138e-05, "loss": 0.2934, "step": 5448 }, { "epoch": 18.78965517241379, "grad_norm": 0.937419056892395, "learning_rate": 2.2531034482758623e-05, "loss": 0.2888, "step": 5449 }, { "epoch": 18.79310344827586, "grad_norm": 0.7543556690216064, "learning_rate": 2.2535172413793102e-05, "loss": 0.3048, "step": 5450 }, { "epoch": 18.79655172413793, "grad_norm": 0.845119297504425, "learning_rate": 2.2539310344827587e-05, "loss": 0.2962, "step": 5451 }, { "epoch": 18.8, "grad_norm": 0.9023021459579468, "learning_rate": 2.254344827586207e-05, "loss": 0.264, "step": 5452 }, { "epoch": 18.80344827586207, "grad_norm": 1.0652028322219849, "learning_rate": 2.2547586206896555e-05, "loss": 0.2914, "step": 5453 }, { "epoch": 18.806896551724137, "grad_norm": 0.800163209438324, "learning_rate": 2.2551724137931033e-05, "loss": 0.26, "step": 5454 }, { "epoch": 18.810344827586206, "grad_norm": 0.8589245676994324, "learning_rate": 2.2555862068965515e-05, "loss": 0.2641, "step": 5455 }, { "epoch": 18.813793103448276, "grad_norm": 0.8373687267303467, "learning_rate": 2.256e-05, "loss": 0.2769, "step": 5456 }, { "epoch": 18.817241379310346, "grad_norm": 1.0129364728927612, "learning_rate": 2.2564137931034483e-05, "loss": 0.2918, "step": 5457 }, { "epoch": 18.820689655172412, "grad_norm": 0.9238981604576111, "learning_rate": 2.2568275862068965e-05, "loss": 0.2936, "step": 5458 }, { "epoch": 18.824137931034482, "grad_norm": 0.9023559093475342, "learning_rate": 2.2572413793103447e-05, "loss": 0.2734, "step": 5459 }, { "epoch": 18.82758620689655, "grad_norm": 1.2269339561462402, "learning_rate": 2.2576551724137932e-05, "loss": 0.2681, "step": 5460 }, { "epoch": 18.83103448275862, "grad_norm": 0.9629461765289307, "learning_rate": 2.2580689655172414e-05, "loss": 0.2856, "step": 5461 }, { "epoch": 18.83448275862069, "grad_norm": 1.4104067087173462, "learning_rate": 2.25848275862069e-05, "loss": 0.2636, "step": 5462 }, { "epoch": 18.837931034482757, "grad_norm": 0.9140189290046692, "learning_rate": 2.258896551724138e-05, "loss": 0.3015, "step": 5463 }, { "epoch": 18.841379310344827, "grad_norm": 2.4267892837524414, "learning_rate": 2.2593103448275864e-05, "loss": 0.2861, "step": 5464 }, { "epoch": 18.844827586206897, "grad_norm": 1.7609548568725586, "learning_rate": 2.2597241379310346e-05, "loss": 0.2805, "step": 5465 }, { "epoch": 18.848275862068967, "grad_norm": 1.1780197620391846, "learning_rate": 2.260137931034483e-05, "loss": 0.3368, "step": 5466 }, { "epoch": 18.851724137931036, "grad_norm": 1.1339173316955566, "learning_rate": 2.260551724137931e-05, "loss": 0.2924, "step": 5467 }, { "epoch": 18.855172413793102, "grad_norm": 1.125458836555481, "learning_rate": 2.2609655172413792e-05, "loss": 0.2809, "step": 5468 }, { "epoch": 18.858620689655172, "grad_norm": 0.8789969682693481, "learning_rate": 2.2613793103448277e-05, "loss": 0.3072, "step": 5469 }, { "epoch": 18.862068965517242, "grad_norm": 1.6847026348114014, "learning_rate": 2.261793103448276e-05, "loss": 0.4125, "step": 5470 }, { "epoch": 18.86551724137931, "grad_norm": 0.9112480282783508, "learning_rate": 2.262206896551724e-05, "loss": 0.3632, "step": 5471 }, { "epoch": 18.868965517241378, "grad_norm": 0.5751904249191284, "learning_rate": 2.2626206896551723e-05, "loss": 0.3168, "step": 5472 }, { "epoch": 18.872413793103448, "grad_norm": 1.1206716299057007, "learning_rate": 2.263034482758621e-05, "loss": 0.3659, "step": 5473 }, { "epoch": 18.875862068965517, "grad_norm": 0.8191505074501038, "learning_rate": 2.263448275862069e-05, "loss": 0.3228, "step": 5474 }, { "epoch": 18.879310344827587, "grad_norm": 0.690346360206604, "learning_rate": 2.2638620689655173e-05, "loss": 0.319, "step": 5475 }, { "epoch": 18.882758620689657, "grad_norm": 0.8737021088600159, "learning_rate": 2.2642758620689655e-05, "loss": 0.3057, "step": 5476 }, { "epoch": 18.886206896551723, "grad_norm": 0.6653792262077332, "learning_rate": 2.264689655172414e-05, "loss": 0.2928, "step": 5477 }, { "epoch": 18.889655172413793, "grad_norm": 0.7919082641601562, "learning_rate": 2.2651034482758622e-05, "loss": 0.2772, "step": 5478 }, { "epoch": 18.893103448275863, "grad_norm": 0.7352131605148315, "learning_rate": 2.2655172413793104e-05, "loss": 0.2764, "step": 5479 }, { "epoch": 18.896551724137932, "grad_norm": 0.8095424175262451, "learning_rate": 2.2659310344827586e-05, "loss": 0.2938, "step": 5480 }, { "epoch": 18.9, "grad_norm": 0.7306602597236633, "learning_rate": 2.2663448275862068e-05, "loss": 0.2394, "step": 5481 }, { "epoch": 18.90344827586207, "grad_norm": 1.2895386219024658, "learning_rate": 2.2667586206896554e-05, "loss": 0.2886, "step": 5482 }, { "epoch": 18.906896551724138, "grad_norm": 1.2687749862670898, "learning_rate": 2.2671724137931032e-05, "loss": 0.267, "step": 5483 }, { "epoch": 18.910344827586208, "grad_norm": 1.4556760787963867, "learning_rate": 2.2675862068965518e-05, "loss": 0.264, "step": 5484 }, { "epoch": 18.913793103448278, "grad_norm": 1.4589190483093262, "learning_rate": 2.268e-05, "loss": 0.2821, "step": 5485 }, { "epoch": 18.917241379310344, "grad_norm": 1.4368747472763062, "learning_rate": 2.2684137931034485e-05, "loss": 0.2532, "step": 5486 }, { "epoch": 18.920689655172414, "grad_norm": 1.2917131185531616, "learning_rate": 2.2688275862068964e-05, "loss": 0.2828, "step": 5487 }, { "epoch": 18.924137931034483, "grad_norm": 1.0584832429885864, "learning_rate": 2.269241379310345e-05, "loss": 0.2734, "step": 5488 }, { "epoch": 18.927586206896553, "grad_norm": 0.787033200263977, "learning_rate": 2.269655172413793e-05, "loss": 0.286, "step": 5489 }, { "epoch": 18.93103448275862, "grad_norm": 0.8171913027763367, "learning_rate": 2.2700689655172417e-05, "loss": 0.2579, "step": 5490 }, { "epoch": 18.93448275862069, "grad_norm": 1.4233152866363525, "learning_rate": 2.2704827586206895e-05, "loss": 0.2823, "step": 5491 }, { "epoch": 18.93793103448276, "grad_norm": 1.3032225370407104, "learning_rate": 2.270896551724138e-05, "loss": 0.2812, "step": 5492 }, { "epoch": 18.94137931034483, "grad_norm": 1.250840663909912, "learning_rate": 2.2713103448275863e-05, "loss": 0.3172, "step": 5493 }, { "epoch": 18.944827586206898, "grad_norm": 1.2026050090789795, "learning_rate": 2.2717241379310345e-05, "loss": 0.3366, "step": 5494 }, { "epoch": 18.948275862068964, "grad_norm": 3.037968158721924, "learning_rate": 2.272137931034483e-05, "loss": 0.4173, "step": 5495 }, { "epoch": 18.951724137931034, "grad_norm": 1.048311710357666, "learning_rate": 2.272551724137931e-05, "loss": 0.3099, "step": 5496 }, { "epoch": 18.955172413793104, "grad_norm": 0.6816616058349609, "learning_rate": 2.2729655172413794e-05, "loss": 0.2762, "step": 5497 }, { "epoch": 18.958620689655174, "grad_norm": 1.0867946147918701, "learning_rate": 2.2733793103448276e-05, "loss": 0.2778, "step": 5498 }, { "epoch": 18.96206896551724, "grad_norm": 1.2309067249298096, "learning_rate": 2.273793103448276e-05, "loss": 0.3065, "step": 5499 }, { "epoch": 18.96551724137931, "grad_norm": 0.7601896524429321, "learning_rate": 2.274206896551724e-05, "loss": 0.2671, "step": 5500 }, { "epoch": 18.96896551724138, "grad_norm": 0.8269813656806946, "learning_rate": 2.2746206896551726e-05, "loss": 0.2571, "step": 5501 }, { "epoch": 18.97241379310345, "grad_norm": 1.3528765439987183, "learning_rate": 2.2750344827586208e-05, "loss": 0.2895, "step": 5502 }, { "epoch": 18.97586206896552, "grad_norm": 1.5377795696258545, "learning_rate": 2.2754482758620693e-05, "loss": 0.2937, "step": 5503 }, { "epoch": 18.979310344827585, "grad_norm": 0.9579751491546631, "learning_rate": 2.275862068965517e-05, "loss": 0.2985, "step": 5504 }, { "epoch": 18.982758620689655, "grad_norm": 0.8037461042404175, "learning_rate": 2.2762758620689657e-05, "loss": 0.2437, "step": 5505 }, { "epoch": 18.986206896551725, "grad_norm": 1.3790432214736938, "learning_rate": 2.276689655172414e-05, "loss": 0.258, "step": 5506 }, { "epoch": 18.989655172413794, "grad_norm": 0.8855153918266296, "learning_rate": 2.277103448275862e-05, "loss": 0.2594, "step": 5507 }, { "epoch": 18.99310344827586, "grad_norm": 0.8292520046234131, "learning_rate": 2.2775172413793103e-05, "loss": 0.2667, "step": 5508 }, { "epoch": 18.99655172413793, "grad_norm": 1.0953699350357056, "learning_rate": 2.2779310344827585e-05, "loss": 0.3141, "step": 5509 }, { "epoch": 19.0, "grad_norm": 1.2066729068756104, "learning_rate": 2.278344827586207e-05, "loss": 0.4357, "step": 5510 }, { "epoch": 19.00344827586207, "grad_norm": 0.7250059843063354, "learning_rate": 2.2787586206896552e-05, "loss": 0.3324, "step": 5511 }, { "epoch": 19.00689655172414, "grad_norm": 0.8543237447738647, "learning_rate": 2.2791724137931035e-05, "loss": 0.3073, "step": 5512 }, { "epoch": 19.010344827586206, "grad_norm": 1.0668880939483643, "learning_rate": 2.2795862068965517e-05, "loss": 0.3259, "step": 5513 }, { "epoch": 19.013793103448275, "grad_norm": 1.8236055374145508, "learning_rate": 2.2800000000000002e-05, "loss": 0.2565, "step": 5514 }, { "epoch": 19.017241379310345, "grad_norm": 0.8564413189888, "learning_rate": 2.2804137931034484e-05, "loss": 0.2801, "step": 5515 }, { "epoch": 19.020689655172415, "grad_norm": 0.8810932040214539, "learning_rate": 2.2808275862068966e-05, "loss": 0.2609, "step": 5516 }, { "epoch": 19.02413793103448, "grad_norm": 0.7817314863204956, "learning_rate": 2.2812413793103448e-05, "loss": 0.2704, "step": 5517 }, { "epoch": 19.02758620689655, "grad_norm": 0.7843282222747803, "learning_rate": 2.2816551724137933e-05, "loss": 0.2595, "step": 5518 }, { "epoch": 19.03103448275862, "grad_norm": 1.346493124961853, "learning_rate": 2.2820689655172415e-05, "loss": 0.2788, "step": 5519 }, { "epoch": 19.03448275862069, "grad_norm": 0.7476693987846375, "learning_rate": 2.2824827586206894e-05, "loss": 0.2503, "step": 5520 }, { "epoch": 19.03793103448276, "grad_norm": 1.2612805366516113, "learning_rate": 2.282896551724138e-05, "loss": 0.2804, "step": 5521 }, { "epoch": 19.041379310344826, "grad_norm": 0.8517665266990662, "learning_rate": 2.283310344827586e-05, "loss": 0.2741, "step": 5522 }, { "epoch": 19.044827586206896, "grad_norm": 1.1059577465057373, "learning_rate": 2.2837241379310347e-05, "loss": 0.2584, "step": 5523 }, { "epoch": 19.048275862068966, "grad_norm": 0.740127444267273, "learning_rate": 2.2841379310344826e-05, "loss": 0.2692, "step": 5524 }, { "epoch": 19.051724137931036, "grad_norm": 0.706718385219574, "learning_rate": 2.284551724137931e-05, "loss": 0.2791, "step": 5525 }, { "epoch": 19.055172413793102, "grad_norm": 0.7860538959503174, "learning_rate": 2.2849655172413793e-05, "loss": 0.2393, "step": 5526 }, { "epoch": 19.05862068965517, "grad_norm": 0.9318333268165588, "learning_rate": 2.285379310344828e-05, "loss": 0.2878, "step": 5527 }, { "epoch": 19.06206896551724, "grad_norm": 0.6997091174125671, "learning_rate": 2.285793103448276e-05, "loss": 0.2477, "step": 5528 }, { "epoch": 19.06551724137931, "grad_norm": 1.007441759109497, "learning_rate": 2.2862068965517242e-05, "loss": 0.2623, "step": 5529 }, { "epoch": 19.06896551724138, "grad_norm": 0.9568666815757751, "learning_rate": 2.2866206896551724e-05, "loss": 0.2485, "step": 5530 }, { "epoch": 19.072413793103447, "grad_norm": 0.8607508540153503, "learning_rate": 2.287034482758621e-05, "loss": 0.3175, "step": 5531 }, { "epoch": 19.075862068965517, "grad_norm": 1.0138752460479736, "learning_rate": 2.2874482758620692e-05, "loss": 0.2476, "step": 5532 }, { "epoch": 19.079310344827586, "grad_norm": 0.9690278768539429, "learning_rate": 2.287862068965517e-05, "loss": 0.2752, "step": 5533 }, { "epoch": 19.082758620689656, "grad_norm": 1.3618175983428955, "learning_rate": 2.2882758620689656e-05, "loss": 0.3011, "step": 5534 }, { "epoch": 19.086206896551722, "grad_norm": 1.472653865814209, "learning_rate": 2.2886896551724138e-05, "loss": 0.3756, "step": 5535 }, { "epoch": 19.089655172413792, "grad_norm": 0.5345167517662048, "learning_rate": 2.2891034482758623e-05, "loss": 0.2865, "step": 5536 }, { "epoch": 19.093103448275862, "grad_norm": 0.5620251297950745, "learning_rate": 2.2895172413793102e-05, "loss": 0.3132, "step": 5537 }, { "epoch": 19.09655172413793, "grad_norm": 0.5771294236183167, "learning_rate": 2.2899310344827587e-05, "loss": 0.3382, "step": 5538 }, { "epoch": 19.1, "grad_norm": 1.1807019710540771, "learning_rate": 2.290344827586207e-05, "loss": 0.3001, "step": 5539 }, { "epoch": 19.103448275862068, "grad_norm": 0.7353456616401672, "learning_rate": 2.2907586206896555e-05, "loss": 0.2713, "step": 5540 }, { "epoch": 19.106896551724137, "grad_norm": 0.8032722473144531, "learning_rate": 2.2911724137931033e-05, "loss": 0.2839, "step": 5541 }, { "epoch": 19.110344827586207, "grad_norm": 1.7329394817352295, "learning_rate": 2.291586206896552e-05, "loss": 0.2729, "step": 5542 }, { "epoch": 19.113793103448277, "grad_norm": 0.7458354830741882, "learning_rate": 2.292e-05, "loss": 0.2679, "step": 5543 }, { "epoch": 19.117241379310343, "grad_norm": 0.7188073396682739, "learning_rate": 2.2924137931034486e-05, "loss": 0.2737, "step": 5544 }, { "epoch": 19.120689655172413, "grad_norm": 0.849056601524353, "learning_rate": 2.2928275862068965e-05, "loss": 0.2463, "step": 5545 }, { "epoch": 19.124137931034483, "grad_norm": 0.9451251029968262, "learning_rate": 2.2932413793103447e-05, "loss": 0.2491, "step": 5546 }, { "epoch": 19.127586206896552, "grad_norm": 1.0158774852752686, "learning_rate": 2.2936551724137932e-05, "loss": 0.3244, "step": 5547 }, { "epoch": 19.131034482758622, "grad_norm": 0.7775508761405945, "learning_rate": 2.2940689655172414e-05, "loss": 0.2647, "step": 5548 }, { "epoch": 19.13448275862069, "grad_norm": 0.6276335120201111, "learning_rate": 2.2944827586206896e-05, "loss": 0.2692, "step": 5549 }, { "epoch": 19.137931034482758, "grad_norm": 1.1415958404541016, "learning_rate": 2.2948965517241378e-05, "loss": 0.2908, "step": 5550 }, { "epoch": 19.141379310344828, "grad_norm": 1.2091315984725952, "learning_rate": 2.2953103448275864e-05, "loss": 0.2815, "step": 5551 }, { "epoch": 19.144827586206898, "grad_norm": 1.0676440000534058, "learning_rate": 2.2957241379310346e-05, "loss": 0.2607, "step": 5552 }, { "epoch": 19.148275862068967, "grad_norm": 1.3118976354599, "learning_rate": 2.2961379310344828e-05, "loss": 0.2576, "step": 5553 }, { "epoch": 19.151724137931033, "grad_norm": 0.9270119071006775, "learning_rate": 2.296551724137931e-05, "loss": 0.2539, "step": 5554 }, { "epoch": 19.155172413793103, "grad_norm": 1.0422191619873047, "learning_rate": 2.2969655172413795e-05, "loss": 0.2522, "step": 5555 }, { "epoch": 19.158620689655173, "grad_norm": 1.363730788230896, "learning_rate": 2.2973793103448277e-05, "loss": 0.2449, "step": 5556 }, { "epoch": 19.162068965517243, "grad_norm": 0.8320056796073914, "learning_rate": 2.297793103448276e-05, "loss": 0.2437, "step": 5557 }, { "epoch": 19.16551724137931, "grad_norm": 1.0026248693466187, "learning_rate": 2.298206896551724e-05, "loss": 0.2643, "step": 5558 }, { "epoch": 19.16896551724138, "grad_norm": 1.2879555225372314, "learning_rate": 2.2986206896551723e-05, "loss": 0.2706, "step": 5559 }, { "epoch": 19.17241379310345, "grad_norm": 2.5129079818725586, "learning_rate": 2.299034482758621e-05, "loss": 0.402, "step": 5560 }, { "epoch": 19.175862068965518, "grad_norm": 1.3114101886749268, "learning_rate": 2.299448275862069e-05, "loss": 0.3465, "step": 5561 }, { "epoch": 19.179310344827588, "grad_norm": 0.617169976234436, "learning_rate": 2.2998620689655173e-05, "loss": 0.2879, "step": 5562 }, { "epoch": 19.182758620689654, "grad_norm": 0.6920482516288757, "learning_rate": 2.3002758620689655e-05, "loss": 0.3071, "step": 5563 }, { "epoch": 19.186206896551724, "grad_norm": 1.3271194696426392, "learning_rate": 2.300689655172414e-05, "loss": 0.2968, "step": 5564 }, { "epoch": 19.189655172413794, "grad_norm": 1.8174065351486206, "learning_rate": 2.3011034482758622e-05, "loss": 0.3045, "step": 5565 }, { "epoch": 19.193103448275863, "grad_norm": 0.6806783080101013, "learning_rate": 2.3015172413793104e-05, "loss": 0.2552, "step": 5566 }, { "epoch": 19.19655172413793, "grad_norm": 0.8256332278251648, "learning_rate": 2.3019310344827586e-05, "loss": 0.26, "step": 5567 }, { "epoch": 19.2, "grad_norm": 0.567965567111969, "learning_rate": 2.302344827586207e-05, "loss": 0.311, "step": 5568 }, { "epoch": 19.20344827586207, "grad_norm": 0.9481228590011597, "learning_rate": 2.3027586206896554e-05, "loss": 0.2814, "step": 5569 }, { "epoch": 19.20689655172414, "grad_norm": 1.1629655361175537, "learning_rate": 2.3031724137931036e-05, "loss": 0.287, "step": 5570 }, { "epoch": 19.21034482758621, "grad_norm": 0.6065325140953064, "learning_rate": 2.3035862068965518e-05, "loss": 0.2481, "step": 5571 }, { "epoch": 19.213793103448275, "grad_norm": 0.8927692174911499, "learning_rate": 2.304e-05, "loss": 0.2841, "step": 5572 }, { "epoch": 19.217241379310344, "grad_norm": 0.6482217907905579, "learning_rate": 2.3044137931034485e-05, "loss": 0.2751, "step": 5573 }, { "epoch": 19.220689655172414, "grad_norm": 0.7826296091079712, "learning_rate": 2.3048275862068964e-05, "loss": 0.2416, "step": 5574 }, { "epoch": 19.224137931034484, "grad_norm": 0.9087565541267395, "learning_rate": 2.305241379310345e-05, "loss": 0.274, "step": 5575 }, { "epoch": 19.22758620689655, "grad_norm": 0.7581960558891296, "learning_rate": 2.305655172413793e-05, "loss": 0.2348, "step": 5576 }, { "epoch": 19.23103448275862, "grad_norm": 0.8122885823249817, "learning_rate": 2.3060689655172417e-05, "loss": 0.2737, "step": 5577 }, { "epoch": 19.23448275862069, "grad_norm": 1.2170257568359375, "learning_rate": 2.3064827586206895e-05, "loss": 0.2702, "step": 5578 }, { "epoch": 19.23793103448276, "grad_norm": 1.3396179676055908, "learning_rate": 2.306896551724138e-05, "loss": 0.2508, "step": 5579 }, { "epoch": 19.24137931034483, "grad_norm": 1.4869052171707153, "learning_rate": 2.3073103448275863e-05, "loss": 0.2415, "step": 5580 }, { "epoch": 19.244827586206895, "grad_norm": 1.078561782836914, "learning_rate": 2.3077241379310348e-05, "loss": 0.2636, "step": 5581 }, { "epoch": 19.248275862068965, "grad_norm": 1.4500601291656494, "learning_rate": 2.3081379310344827e-05, "loss": 0.2754, "step": 5582 }, { "epoch": 19.251724137931035, "grad_norm": 0.8806644678115845, "learning_rate": 2.308551724137931e-05, "loss": 0.2772, "step": 5583 }, { "epoch": 19.255172413793105, "grad_norm": 1.3163529634475708, "learning_rate": 2.3089655172413794e-05, "loss": 0.3319, "step": 5584 }, { "epoch": 19.25862068965517, "grad_norm": 1.6446634531021118, "learning_rate": 2.3093793103448276e-05, "loss": 0.3722, "step": 5585 }, { "epoch": 19.26206896551724, "grad_norm": 0.6783074736595154, "learning_rate": 2.3097931034482758e-05, "loss": 0.3224, "step": 5586 }, { "epoch": 19.26551724137931, "grad_norm": 1.0234757661819458, "learning_rate": 2.310206896551724e-05, "loss": 0.3258, "step": 5587 }, { "epoch": 19.26896551724138, "grad_norm": 0.4566297233104706, "learning_rate": 2.3106206896551725e-05, "loss": 0.2683, "step": 5588 }, { "epoch": 19.27241379310345, "grad_norm": 0.5641801953315735, "learning_rate": 2.3110344827586207e-05, "loss": 0.2662, "step": 5589 }, { "epoch": 19.275862068965516, "grad_norm": 0.7048805952072144, "learning_rate": 2.311448275862069e-05, "loss": 0.267, "step": 5590 }, { "epoch": 19.279310344827586, "grad_norm": 0.610844612121582, "learning_rate": 2.311862068965517e-05, "loss": 0.3136, "step": 5591 }, { "epoch": 19.282758620689656, "grad_norm": 0.5865924954414368, "learning_rate": 2.3122758620689657e-05, "loss": 0.2824, "step": 5592 }, { "epoch": 19.286206896551725, "grad_norm": 0.7363469004631042, "learning_rate": 2.312689655172414e-05, "loss": 0.2655, "step": 5593 }, { "epoch": 19.28965517241379, "grad_norm": 0.6581054329872131, "learning_rate": 2.3131034482758624e-05, "loss": 0.2612, "step": 5594 }, { "epoch": 19.29310344827586, "grad_norm": 1.4020252227783203, "learning_rate": 2.3135172413793103e-05, "loss": 0.2447, "step": 5595 }, { "epoch": 19.29655172413793, "grad_norm": 1.0283812284469604, "learning_rate": 2.3139310344827585e-05, "loss": 0.2583, "step": 5596 }, { "epoch": 19.3, "grad_norm": 1.0782355070114136, "learning_rate": 2.314344827586207e-05, "loss": 0.2838, "step": 5597 }, { "epoch": 19.30344827586207, "grad_norm": 0.6761720776557922, "learning_rate": 2.3147586206896552e-05, "loss": 0.2578, "step": 5598 }, { "epoch": 19.306896551724137, "grad_norm": 0.7633607387542725, "learning_rate": 2.3151724137931034e-05, "loss": 0.2706, "step": 5599 }, { "epoch": 19.310344827586206, "grad_norm": 1.8563719987869263, "learning_rate": 2.3155862068965516e-05, "loss": 0.284, "step": 5600 }, { "epoch": 19.313793103448276, "grad_norm": 2.0416207313537598, "learning_rate": 2.3160000000000002e-05, "loss": 0.2494, "step": 5601 }, { "epoch": 19.317241379310346, "grad_norm": 0.7485294342041016, "learning_rate": 2.3164137931034484e-05, "loss": 0.2494, "step": 5602 }, { "epoch": 19.320689655172412, "grad_norm": 0.9446775317192078, "learning_rate": 2.3168275862068966e-05, "loss": 0.2347, "step": 5603 }, { "epoch": 19.324137931034482, "grad_norm": 1.8247169256210327, "learning_rate": 2.3172413793103448e-05, "loss": 0.295, "step": 5604 }, { "epoch": 19.32758620689655, "grad_norm": 0.771604597568512, "learning_rate": 2.3176551724137933e-05, "loss": 0.2412, "step": 5605 }, { "epoch": 19.33103448275862, "grad_norm": 1.4343595504760742, "learning_rate": 2.3180689655172415e-05, "loss": 0.3018, "step": 5606 }, { "epoch": 19.33448275862069, "grad_norm": 0.9017931818962097, "learning_rate": 2.3184827586206897e-05, "loss": 0.2771, "step": 5607 }, { "epoch": 19.337931034482757, "grad_norm": 1.0792618989944458, "learning_rate": 2.318896551724138e-05, "loss": 0.2936, "step": 5608 }, { "epoch": 19.341379310344827, "grad_norm": 1.273684024810791, "learning_rate": 2.319310344827586e-05, "loss": 0.308, "step": 5609 }, { "epoch": 19.344827586206897, "grad_norm": 1.277341604232788, "learning_rate": 2.3197241379310347e-05, "loss": 0.3649, "step": 5610 }, { "epoch": 19.348275862068967, "grad_norm": 0.9168273210525513, "learning_rate": 2.3201379310344825e-05, "loss": 0.3064, "step": 5611 }, { "epoch": 19.351724137931033, "grad_norm": 0.8478251099586487, "learning_rate": 2.320551724137931e-05, "loss": 0.2787, "step": 5612 }, { "epoch": 19.355172413793102, "grad_norm": 0.8380334973335266, "learning_rate": 2.3209655172413793e-05, "loss": 0.3033, "step": 5613 }, { "epoch": 19.358620689655172, "grad_norm": 0.983158528804779, "learning_rate": 2.3213793103448278e-05, "loss": 0.2989, "step": 5614 }, { "epoch": 19.362068965517242, "grad_norm": 0.9276583194732666, "learning_rate": 2.3217931034482757e-05, "loss": 0.3093, "step": 5615 }, { "epoch": 19.36551724137931, "grad_norm": 0.6382678151130676, "learning_rate": 2.3222068965517242e-05, "loss": 0.2703, "step": 5616 }, { "epoch": 19.368965517241378, "grad_norm": 1.3062987327575684, "learning_rate": 2.3226206896551724e-05, "loss": 0.2851, "step": 5617 }, { "epoch": 19.372413793103448, "grad_norm": 1.0386908054351807, "learning_rate": 2.323034482758621e-05, "loss": 0.2762, "step": 5618 }, { "epoch": 19.375862068965517, "grad_norm": 0.7260191440582275, "learning_rate": 2.323448275862069e-05, "loss": 0.2537, "step": 5619 }, { "epoch": 19.379310344827587, "grad_norm": 1.078510046005249, "learning_rate": 2.3238620689655174e-05, "loss": 0.292, "step": 5620 }, { "epoch": 19.382758620689657, "grad_norm": 0.9238402247428894, "learning_rate": 2.3242758620689656e-05, "loss": 0.2873, "step": 5621 }, { "epoch": 19.386206896551723, "grad_norm": 0.6782797574996948, "learning_rate": 2.3246896551724138e-05, "loss": 0.2742, "step": 5622 }, { "epoch": 19.389655172413793, "grad_norm": 1.347440242767334, "learning_rate": 2.325103448275862e-05, "loss": 0.3097, "step": 5623 }, { "epoch": 19.393103448275863, "grad_norm": 1.5175658464431763, "learning_rate": 2.3255172413793102e-05, "loss": 0.2634, "step": 5624 }, { "epoch": 19.396551724137932, "grad_norm": 0.939930260181427, "learning_rate": 2.3259310344827587e-05, "loss": 0.2707, "step": 5625 }, { "epoch": 19.4, "grad_norm": 0.728266179561615, "learning_rate": 2.326344827586207e-05, "loss": 0.2419, "step": 5626 }, { "epoch": 19.40344827586207, "grad_norm": 1.7844791412353516, "learning_rate": 2.3267586206896555e-05, "loss": 0.2826, "step": 5627 }, { "epoch": 19.406896551724138, "grad_norm": 1.246944546699524, "learning_rate": 2.3271724137931033e-05, "loss": 0.3146, "step": 5628 }, { "epoch": 19.410344827586208, "grad_norm": 0.8280079364776611, "learning_rate": 2.327586206896552e-05, "loss": 0.2848, "step": 5629 }, { "epoch": 19.413793103448278, "grad_norm": 1.0097185373306274, "learning_rate": 2.328e-05, "loss": 0.265, "step": 5630 }, { "epoch": 19.417241379310344, "grad_norm": 1.0335825681686401, "learning_rate": 2.3284137931034486e-05, "loss": 0.3013, "step": 5631 }, { "epoch": 19.420689655172414, "grad_norm": 0.8476930856704712, "learning_rate": 2.3288275862068965e-05, "loss": 0.2557, "step": 5632 }, { "epoch": 19.424137931034483, "grad_norm": 1.043220043182373, "learning_rate": 2.329241379310345e-05, "loss": 0.2679, "step": 5633 }, { "epoch": 19.427586206896553, "grad_norm": 2.957110643386841, "learning_rate": 2.3296551724137932e-05, "loss": 0.3044, "step": 5634 }, { "epoch": 19.43103448275862, "grad_norm": 11.793370246887207, "learning_rate": 2.3300689655172414e-05, "loss": 0.431, "step": 5635 }, { "epoch": 19.43448275862069, "grad_norm": 0.9608220458030701, "learning_rate": 2.3304827586206896e-05, "loss": 0.3252, "step": 5636 }, { "epoch": 19.43793103448276, "grad_norm": 0.8872681260108948, "learning_rate": 2.3308965517241378e-05, "loss": 0.2607, "step": 5637 }, { "epoch": 19.44137931034483, "grad_norm": 1.303954839706421, "learning_rate": 2.3313103448275864e-05, "loss": 0.281, "step": 5638 }, { "epoch": 19.444827586206898, "grad_norm": 1.081660270690918, "learning_rate": 2.3317241379310346e-05, "loss": 0.2906, "step": 5639 }, { "epoch": 19.448275862068964, "grad_norm": 0.8935769200325012, "learning_rate": 2.3321379310344828e-05, "loss": 0.2899, "step": 5640 }, { "epoch": 19.451724137931034, "grad_norm": 1.083427906036377, "learning_rate": 2.332551724137931e-05, "loss": 0.2591, "step": 5641 }, { "epoch": 19.455172413793104, "grad_norm": 0.6280823349952698, "learning_rate": 2.3329655172413795e-05, "loss": 0.3124, "step": 5642 }, { "epoch": 19.458620689655174, "grad_norm": 0.6222635507583618, "learning_rate": 2.3333793103448277e-05, "loss": 0.299, "step": 5643 }, { "epoch": 19.46206896551724, "grad_norm": 0.9043587446212769, "learning_rate": 2.333793103448276e-05, "loss": 0.2765, "step": 5644 }, { "epoch": 19.46551724137931, "grad_norm": 0.7138671278953552, "learning_rate": 2.334206896551724e-05, "loss": 0.2322, "step": 5645 }, { "epoch": 19.46896551724138, "grad_norm": 0.7725844383239746, "learning_rate": 2.3346206896551727e-05, "loss": 0.2552, "step": 5646 }, { "epoch": 19.47241379310345, "grad_norm": 3.2660717964172363, "learning_rate": 2.335034482758621e-05, "loss": 0.2572, "step": 5647 }, { "epoch": 19.47586206896552, "grad_norm": 0.8228668570518494, "learning_rate": 2.3354482758620687e-05, "loss": 0.2568, "step": 5648 }, { "epoch": 19.479310344827585, "grad_norm": 3.0134270191192627, "learning_rate": 2.3358620689655173e-05, "loss": 0.2596, "step": 5649 }, { "epoch": 19.482758620689655, "grad_norm": 0.7211872935295105, "learning_rate": 2.3362758620689655e-05, "loss": 0.2624, "step": 5650 }, { "epoch": 19.486206896551725, "grad_norm": 0.9586041569709778, "learning_rate": 2.336689655172414e-05, "loss": 0.258, "step": 5651 }, { "epoch": 19.489655172413794, "grad_norm": 0.8691920042037964, "learning_rate": 2.337103448275862e-05, "loss": 0.26, "step": 5652 }, { "epoch": 19.49310344827586, "grad_norm": 1.0895354747772217, "learning_rate": 2.3375172413793104e-05, "loss": 0.2955, "step": 5653 }, { "epoch": 19.49655172413793, "grad_norm": 3.1528100967407227, "learning_rate": 2.3379310344827586e-05, "loss": 0.2293, "step": 5654 }, { "epoch": 19.5, "grad_norm": 1.023695707321167, "learning_rate": 2.338344827586207e-05, "loss": 0.2594, "step": 5655 }, { "epoch": 19.50344827586207, "grad_norm": 1.1192139387130737, "learning_rate": 2.338758620689655e-05, "loss": 0.2566, "step": 5656 }, { "epoch": 19.50689655172414, "grad_norm": 0.9506075978279114, "learning_rate": 2.3391724137931036e-05, "loss": 0.299, "step": 5657 }, { "epoch": 19.510344827586206, "grad_norm": 0.9447919130325317, "learning_rate": 2.3395862068965518e-05, "loss": 0.2721, "step": 5658 }, { "epoch": 19.513793103448275, "grad_norm": 1.2381452322006226, "learning_rate": 2.3400000000000003e-05, "loss": 0.3142, "step": 5659 }, { "epoch": 19.517241379310345, "grad_norm": 8.407148361206055, "learning_rate": 2.3404137931034485e-05, "loss": 0.3778, "step": 5660 }, { "epoch": 19.520689655172415, "grad_norm": 0.9169996380805969, "learning_rate": 2.3408275862068964e-05, "loss": 0.3173, "step": 5661 }, { "epoch": 19.52413793103448, "grad_norm": 0.7710758447647095, "learning_rate": 2.341241379310345e-05, "loss": 0.2871, "step": 5662 }, { "epoch": 19.52758620689655, "grad_norm": 0.5682987570762634, "learning_rate": 2.341655172413793e-05, "loss": 0.2686, "step": 5663 }, { "epoch": 19.53103448275862, "grad_norm": 0.8372921943664551, "learning_rate": 2.3420689655172416e-05, "loss": 0.2818, "step": 5664 }, { "epoch": 19.53448275862069, "grad_norm": 1.1204626560211182, "learning_rate": 2.3424827586206895e-05, "loss": 0.2608, "step": 5665 }, { "epoch": 19.53793103448276, "grad_norm": 0.8097769618034363, "learning_rate": 2.342896551724138e-05, "loss": 0.2628, "step": 5666 }, { "epoch": 19.541379310344826, "grad_norm": 0.5468595623970032, "learning_rate": 2.3433103448275862e-05, "loss": 0.2878, "step": 5667 }, { "epoch": 19.544827586206896, "grad_norm": 1.1430665254592896, "learning_rate": 2.3437241379310348e-05, "loss": 0.3064, "step": 5668 }, { "epoch": 19.548275862068966, "grad_norm": 0.7791063785552979, "learning_rate": 2.3441379310344827e-05, "loss": 0.3027, "step": 5669 }, { "epoch": 19.551724137931036, "grad_norm": 0.7038488388061523, "learning_rate": 2.3445517241379312e-05, "loss": 0.2834, "step": 5670 }, { "epoch": 19.555172413793102, "grad_norm": 1.1918556690216064, "learning_rate": 2.3449655172413794e-05, "loss": 0.2801, "step": 5671 }, { "epoch": 19.55862068965517, "grad_norm": 0.7333104014396667, "learning_rate": 2.345379310344828e-05, "loss": 0.3109, "step": 5672 }, { "epoch": 19.56206896551724, "grad_norm": 1.0190019607543945, "learning_rate": 2.3457931034482758e-05, "loss": 0.278, "step": 5673 }, { "epoch": 19.56551724137931, "grad_norm": 1.721420407295227, "learning_rate": 2.346206896551724e-05, "loss": 0.277, "step": 5674 }, { "epoch": 19.56896551724138, "grad_norm": 0.6660922765731812, "learning_rate": 2.3466206896551725e-05, "loss": 0.2717, "step": 5675 }, { "epoch": 19.572413793103447, "grad_norm": 0.8518005013465881, "learning_rate": 2.3470344827586207e-05, "loss": 0.2299, "step": 5676 }, { "epoch": 19.575862068965517, "grad_norm": 3.9174723625183105, "learning_rate": 2.347448275862069e-05, "loss": 0.2783, "step": 5677 }, { "epoch": 19.579310344827586, "grad_norm": 0.9594706296920776, "learning_rate": 2.347862068965517e-05, "loss": 0.2392, "step": 5678 }, { "epoch": 19.582758620689656, "grad_norm": 2.219055414199829, "learning_rate": 2.3482758620689657e-05, "loss": 0.2389, "step": 5679 }, { "epoch": 19.586206896551722, "grad_norm": 1.0903708934783936, "learning_rate": 2.348689655172414e-05, "loss": 0.2886, "step": 5680 }, { "epoch": 19.589655172413792, "grad_norm": 0.8044618368148804, "learning_rate": 2.349103448275862e-05, "loss": 0.2444, "step": 5681 }, { "epoch": 19.593103448275862, "grad_norm": 1.1745916604995728, "learning_rate": 2.3495172413793103e-05, "loss": 0.258, "step": 5682 }, { "epoch": 19.59655172413793, "grad_norm": 2.02158522605896, "learning_rate": 2.349931034482759e-05, "loss": 0.276, "step": 5683 }, { "epoch": 19.6, "grad_norm": 1.7028814554214478, "learning_rate": 2.350344827586207e-05, "loss": 0.3123, "step": 5684 }, { "epoch": 19.603448275862068, "grad_norm": 1.7622395753860474, "learning_rate": 2.3507586206896552e-05, "loss": 0.3797, "step": 5685 }, { "epoch": 19.606896551724137, "grad_norm": 0.7590331435203552, "learning_rate": 2.3511724137931034e-05, "loss": 0.3234, "step": 5686 }, { "epoch": 19.610344827586207, "grad_norm": 0.9913783073425293, "learning_rate": 2.3515862068965516e-05, "loss": 0.2651, "step": 5687 }, { "epoch": 19.613793103448277, "grad_norm": 0.7847728729248047, "learning_rate": 2.3520000000000002e-05, "loss": 0.2556, "step": 5688 }, { "epoch": 19.617241379310343, "grad_norm": 0.8161184191703796, "learning_rate": 2.352413793103448e-05, "loss": 0.2741, "step": 5689 }, { "epoch": 19.620689655172413, "grad_norm": 0.6787846684455872, "learning_rate": 2.3528275862068966e-05, "loss": 0.2761, "step": 5690 }, { "epoch": 19.624137931034483, "grad_norm": 0.6041955947875977, "learning_rate": 2.3532413793103448e-05, "loss": 0.2653, "step": 5691 }, { "epoch": 19.627586206896552, "grad_norm": 0.6574248671531677, "learning_rate": 2.3536551724137933e-05, "loss": 0.2747, "step": 5692 }, { "epoch": 19.631034482758622, "grad_norm": 0.8645961880683899, "learning_rate": 2.3540689655172415e-05, "loss": 0.2707, "step": 5693 }, { "epoch": 19.63448275862069, "grad_norm": 0.6780698895454407, "learning_rate": 2.3544827586206897e-05, "loss": 0.2451, "step": 5694 }, { "epoch": 19.637931034482758, "grad_norm": 1.292212963104248, "learning_rate": 2.354896551724138e-05, "loss": 0.2461, "step": 5695 }, { "epoch": 19.641379310344828, "grad_norm": 0.6345680952072144, "learning_rate": 2.3553103448275865e-05, "loss": 0.2443, "step": 5696 }, { "epoch": 19.644827586206898, "grad_norm": 0.9408783912658691, "learning_rate": 2.3557241379310347e-05, "loss": 0.2629, "step": 5697 }, { "epoch": 19.648275862068964, "grad_norm": 0.8753872513771057, "learning_rate": 2.356137931034483e-05, "loss": 0.2796, "step": 5698 }, { "epoch": 19.651724137931033, "grad_norm": 0.7035714983940125, "learning_rate": 2.356551724137931e-05, "loss": 0.2541, "step": 5699 }, { "epoch": 19.655172413793103, "grad_norm": 1.038698673248291, "learning_rate": 2.3569655172413793e-05, "loss": 0.2874, "step": 5700 }, { "epoch": 19.658620689655173, "grad_norm": 0.9234484434127808, "learning_rate": 2.3573793103448278e-05, "loss": 0.2542, "step": 5701 }, { "epoch": 19.662068965517243, "grad_norm": 1.1493257284164429, "learning_rate": 2.3577931034482757e-05, "loss": 0.2601, "step": 5702 }, { "epoch": 19.66551724137931, "grad_norm": 0.7554227113723755, "learning_rate": 2.3582068965517242e-05, "loss": 0.2736, "step": 5703 }, { "epoch": 19.66896551724138, "grad_norm": 0.9034656882286072, "learning_rate": 2.3586206896551724e-05, "loss": 0.2605, "step": 5704 }, { "epoch": 19.67241379310345, "grad_norm": 1.25230872631073, "learning_rate": 2.359034482758621e-05, "loss": 0.2511, "step": 5705 }, { "epoch": 19.675862068965518, "grad_norm": 1.053385853767395, "learning_rate": 2.3594482758620688e-05, "loss": 0.2961, "step": 5706 }, { "epoch": 19.679310344827588, "grad_norm": 0.8381445407867432, "learning_rate": 2.3598620689655174e-05, "loss": 0.2896, "step": 5707 }, { "epoch": 19.682758620689654, "grad_norm": 1.461146354675293, "learning_rate": 2.3602758620689656e-05, "loss": 0.2893, "step": 5708 }, { "epoch": 19.686206896551724, "grad_norm": 1.3408633470535278, "learning_rate": 2.360689655172414e-05, "loss": 0.3076, "step": 5709 }, { "epoch": 19.689655172413794, "grad_norm": 2.757241725921631, "learning_rate": 2.361103448275862e-05, "loss": 0.4211, "step": 5710 }, { "epoch": 19.693103448275863, "grad_norm": 0.8659671545028687, "learning_rate": 2.3615172413793105e-05, "loss": 0.3251, "step": 5711 }, { "epoch": 19.69655172413793, "grad_norm": 0.6711256504058838, "learning_rate": 2.3619310344827587e-05, "loss": 0.314, "step": 5712 }, { "epoch": 19.7, "grad_norm": 0.6607583165168762, "learning_rate": 2.362344827586207e-05, "loss": 0.2929, "step": 5713 }, { "epoch": 19.70344827586207, "grad_norm": 0.6553300619125366, "learning_rate": 2.362758620689655e-05, "loss": 0.2895, "step": 5714 }, { "epoch": 19.70689655172414, "grad_norm": 0.921843945980072, "learning_rate": 2.3631724137931033e-05, "loss": 0.2967, "step": 5715 }, { "epoch": 19.71034482758621, "grad_norm": 0.8748689889907837, "learning_rate": 2.363586206896552e-05, "loss": 0.2839, "step": 5716 }, { "epoch": 19.713793103448275, "grad_norm": 0.8940473794937134, "learning_rate": 2.364e-05, "loss": 0.3248, "step": 5717 }, { "epoch": 19.717241379310344, "grad_norm": 0.7639476656913757, "learning_rate": 2.3644137931034483e-05, "loss": 0.2648, "step": 5718 }, { "epoch": 19.720689655172414, "grad_norm": 0.8999742269515991, "learning_rate": 2.3648275862068965e-05, "loss": 0.2473, "step": 5719 }, { "epoch": 19.724137931034484, "grad_norm": 0.6216675639152527, "learning_rate": 2.365241379310345e-05, "loss": 0.2325, "step": 5720 }, { "epoch": 19.72758620689655, "grad_norm": 1.7069205045700073, "learning_rate": 2.3656551724137932e-05, "loss": 0.2746, "step": 5721 }, { "epoch": 19.73103448275862, "grad_norm": 0.5729489922523499, "learning_rate": 2.3660689655172414e-05, "loss": 0.2435, "step": 5722 }, { "epoch": 19.73448275862069, "grad_norm": 0.7027910351753235, "learning_rate": 2.3664827586206896e-05, "loss": 0.244, "step": 5723 }, { "epoch": 19.73793103448276, "grad_norm": 0.8466611504554749, "learning_rate": 2.366896551724138e-05, "loss": 0.2753, "step": 5724 }, { "epoch": 19.74137931034483, "grad_norm": 0.8353732824325562, "learning_rate": 2.3673103448275864e-05, "loss": 0.2381, "step": 5725 }, { "epoch": 19.744827586206895, "grad_norm": 0.8210645318031311, "learning_rate": 2.3677241379310346e-05, "loss": 0.273, "step": 5726 }, { "epoch": 19.748275862068965, "grad_norm": 1.99734628200531, "learning_rate": 2.3681379310344828e-05, "loss": 0.2699, "step": 5727 }, { "epoch": 19.751724137931035, "grad_norm": 1.2472065687179565, "learning_rate": 2.368551724137931e-05, "loss": 0.2673, "step": 5728 }, { "epoch": 19.755172413793105, "grad_norm": 0.6474564075469971, "learning_rate": 2.3689655172413795e-05, "loss": 0.248, "step": 5729 }, { "epoch": 19.75862068965517, "grad_norm": 0.811617910861969, "learning_rate": 2.3693793103448277e-05, "loss": 0.2865, "step": 5730 }, { "epoch": 19.76206896551724, "grad_norm": 1.1003798246383667, "learning_rate": 2.369793103448276e-05, "loss": 0.3098, "step": 5731 }, { "epoch": 19.76551724137931, "grad_norm": 1.3166841268539429, "learning_rate": 2.370206896551724e-05, "loss": 0.2424, "step": 5732 }, { "epoch": 19.76896551724138, "grad_norm": 0.9236419796943665, "learning_rate": 2.3706206896551727e-05, "loss": 0.2602, "step": 5733 }, { "epoch": 19.77241379310345, "grad_norm": 1.2356089353561401, "learning_rate": 2.371034482758621e-05, "loss": 0.2906, "step": 5734 }, { "epoch": 19.775862068965516, "grad_norm": 1.245397686958313, "learning_rate": 2.371448275862069e-05, "loss": 0.3796, "step": 5735 }, { "epoch": 19.779310344827586, "grad_norm": 0.8533849120140076, "learning_rate": 2.3718620689655173e-05, "loss": 0.356, "step": 5736 }, { "epoch": 19.782758620689656, "grad_norm": 0.7510787844657898, "learning_rate": 2.3722758620689658e-05, "loss": 0.3349, "step": 5737 }, { "epoch": 19.786206896551725, "grad_norm": 0.789710283279419, "learning_rate": 2.372689655172414e-05, "loss": 0.2997, "step": 5738 }, { "epoch": 19.78965517241379, "grad_norm": 0.8529327511787415, "learning_rate": 2.373103448275862e-05, "loss": 0.2901, "step": 5739 }, { "epoch": 19.79310344827586, "grad_norm": 0.742501437664032, "learning_rate": 2.3735172413793104e-05, "loss": 0.2928, "step": 5740 }, { "epoch": 19.79655172413793, "grad_norm": 0.814373254776001, "learning_rate": 2.3739310344827586e-05, "loss": 0.2978, "step": 5741 }, { "epoch": 19.8, "grad_norm": 0.9836295247077942, "learning_rate": 2.374344827586207e-05, "loss": 0.2511, "step": 5742 }, { "epoch": 19.80344827586207, "grad_norm": 0.6765544414520264, "learning_rate": 2.374758620689655e-05, "loss": 0.2755, "step": 5743 }, { "epoch": 19.806896551724137, "grad_norm": 1.066344141960144, "learning_rate": 2.3751724137931035e-05, "loss": 0.2815, "step": 5744 }, { "epoch": 19.810344827586206, "grad_norm": 1.1470251083374023, "learning_rate": 2.3755862068965517e-05, "loss": 0.2795, "step": 5745 }, { "epoch": 19.813793103448276, "grad_norm": 0.8730713725090027, "learning_rate": 2.3760000000000003e-05, "loss": 0.2788, "step": 5746 }, { "epoch": 19.817241379310346, "grad_norm": 0.8594866991043091, "learning_rate": 2.376413793103448e-05, "loss": 0.2752, "step": 5747 }, { "epoch": 19.820689655172412, "grad_norm": 1.0644009113311768, "learning_rate": 2.3768275862068967e-05, "loss": 0.2658, "step": 5748 }, { "epoch": 19.824137931034482, "grad_norm": 1.4440611600875854, "learning_rate": 2.377241379310345e-05, "loss": 0.2528, "step": 5749 }, { "epoch": 19.82758620689655, "grad_norm": 1.3611077070236206, "learning_rate": 2.377655172413793e-05, "loss": 0.3003, "step": 5750 }, { "epoch": 19.83103448275862, "grad_norm": 0.6765791773796082, "learning_rate": 2.3780689655172413e-05, "loss": 0.2561, "step": 5751 }, { "epoch": 19.83448275862069, "grad_norm": 0.913952112197876, "learning_rate": 2.3784827586206895e-05, "loss": 0.2875, "step": 5752 }, { "epoch": 19.837931034482757, "grad_norm": 0.8443452715873718, "learning_rate": 2.378896551724138e-05, "loss": 0.2815, "step": 5753 }, { "epoch": 19.841379310344827, "grad_norm": 0.9122913479804993, "learning_rate": 2.3793103448275862e-05, "loss": 0.2616, "step": 5754 }, { "epoch": 19.844827586206897, "grad_norm": 1.0420732498168945, "learning_rate": 2.3797241379310344e-05, "loss": 0.3161, "step": 5755 }, { "epoch": 19.848275862068967, "grad_norm": 1.0721906423568726, "learning_rate": 2.3801379310344826e-05, "loss": 0.2859, "step": 5756 }, { "epoch": 19.851724137931036, "grad_norm": 0.8407171368598938, "learning_rate": 2.3805517241379312e-05, "loss": 0.2411, "step": 5757 }, { "epoch": 19.855172413793102, "grad_norm": 3.3743748664855957, "learning_rate": 2.3809655172413794e-05, "loss": 0.2587, "step": 5758 }, { "epoch": 19.858620689655172, "grad_norm": 1.0639479160308838, "learning_rate": 2.381379310344828e-05, "loss": 0.3124, "step": 5759 }, { "epoch": 19.862068965517242, "grad_norm": 2.5368690490722656, "learning_rate": 2.3817931034482758e-05, "loss": 0.3905, "step": 5760 }, { "epoch": 19.86551724137931, "grad_norm": 1.068859338760376, "learning_rate": 2.3822068965517243e-05, "loss": 0.3588, "step": 5761 }, { "epoch": 19.868965517241378, "grad_norm": 0.6294466853141785, "learning_rate": 2.3826206896551725e-05, "loss": 0.2766, "step": 5762 }, { "epoch": 19.872413793103448, "grad_norm": 0.7079286575317383, "learning_rate": 2.3830344827586207e-05, "loss": 0.2732, "step": 5763 }, { "epoch": 19.875862068965517, "grad_norm": 1.258834719657898, "learning_rate": 2.383448275862069e-05, "loss": 0.3004, "step": 5764 }, { "epoch": 19.879310344827587, "grad_norm": 0.5480318069458008, "learning_rate": 2.383862068965517e-05, "loss": 0.2586, "step": 5765 }, { "epoch": 19.882758620689657, "grad_norm": 1.3730032444000244, "learning_rate": 2.3842758620689657e-05, "loss": 0.294, "step": 5766 }, { "epoch": 19.886206896551723, "grad_norm": 0.7549688816070557, "learning_rate": 2.384689655172414e-05, "loss": 0.2635, "step": 5767 }, { "epoch": 19.889655172413793, "grad_norm": 0.6557541489601135, "learning_rate": 2.385103448275862e-05, "loss": 0.2588, "step": 5768 }, { "epoch": 19.893103448275863, "grad_norm": 0.9909510612487793, "learning_rate": 2.3855172413793103e-05, "loss": 0.2705, "step": 5769 }, { "epoch": 19.896551724137932, "grad_norm": 0.8942832350730896, "learning_rate": 2.3859310344827588e-05, "loss": 0.2921, "step": 5770 }, { "epoch": 19.9, "grad_norm": 0.9478798508644104, "learning_rate": 2.386344827586207e-05, "loss": 0.2529, "step": 5771 }, { "epoch": 19.90344827586207, "grad_norm": 0.74189293384552, "learning_rate": 2.3867586206896552e-05, "loss": 0.2453, "step": 5772 }, { "epoch": 19.906896551724138, "grad_norm": 0.8032302856445312, "learning_rate": 2.3871724137931034e-05, "loss": 0.236, "step": 5773 }, { "epoch": 19.910344827586208, "grad_norm": 0.9770944118499756, "learning_rate": 2.387586206896552e-05, "loss": 0.2961, "step": 5774 }, { "epoch": 19.913793103448278, "grad_norm": 0.9329320788383484, "learning_rate": 2.3880000000000002e-05, "loss": 0.2732, "step": 5775 }, { "epoch": 19.917241379310344, "grad_norm": 1.8559486865997314, "learning_rate": 2.388413793103448e-05, "loss": 0.2943, "step": 5776 }, { "epoch": 19.920689655172414, "grad_norm": 0.8542605638504028, "learning_rate": 2.3888275862068966e-05, "loss": 0.2481, "step": 5777 }, { "epoch": 19.924137931034483, "grad_norm": 0.8057799935340881, "learning_rate": 2.3892413793103448e-05, "loss": 0.2403, "step": 5778 }, { "epoch": 19.927586206896553, "grad_norm": 1.4158935546875, "learning_rate": 2.3896551724137933e-05, "loss": 0.3001, "step": 5779 }, { "epoch": 19.93103448275862, "grad_norm": 1.0896663665771484, "learning_rate": 2.3900689655172412e-05, "loss": 0.2743, "step": 5780 }, { "epoch": 19.93448275862069, "grad_norm": 1.0795139074325562, "learning_rate": 2.3904827586206897e-05, "loss": 0.2802, "step": 5781 }, { "epoch": 19.93793103448276, "grad_norm": 1.063507318496704, "learning_rate": 2.390896551724138e-05, "loss": 0.2791, "step": 5782 }, { "epoch": 19.94137931034483, "grad_norm": 2.898609161376953, "learning_rate": 2.3913103448275865e-05, "loss": 0.2739, "step": 5783 }, { "epoch": 19.944827586206898, "grad_norm": 3.5590946674346924, "learning_rate": 2.3917241379310343e-05, "loss": 0.2909, "step": 5784 }, { "epoch": 19.948275862068964, "grad_norm": 1.890473484992981, "learning_rate": 2.392137931034483e-05, "loss": 0.3616, "step": 5785 }, { "epoch": 19.951724137931034, "grad_norm": 0.8184043169021606, "learning_rate": 2.392551724137931e-05, "loss": 0.3026, "step": 5786 }, { "epoch": 19.955172413793104, "grad_norm": 0.5767719149589539, "learning_rate": 2.3929655172413796e-05, "loss": 0.2654, "step": 5787 }, { "epoch": 19.958620689655174, "grad_norm": 0.7195724248886108, "learning_rate": 2.3933793103448275e-05, "loss": 0.2984, "step": 5788 }, { "epoch": 19.96206896551724, "grad_norm": 0.7915554642677307, "learning_rate": 2.3937931034482757e-05, "loss": 0.2688, "step": 5789 }, { "epoch": 19.96551724137931, "grad_norm": 0.7510270476341248, "learning_rate": 2.3942068965517242e-05, "loss": 0.3027, "step": 5790 }, { "epoch": 19.96896551724138, "grad_norm": 5.035958290100098, "learning_rate": 2.3946206896551724e-05, "loss": 0.2553, "step": 5791 }, { "epoch": 19.97241379310345, "grad_norm": 1.3462423086166382, "learning_rate": 2.395034482758621e-05, "loss": 0.2758, "step": 5792 }, { "epoch": 19.97586206896552, "grad_norm": 0.9555546641349792, "learning_rate": 2.3954482758620688e-05, "loss": 0.2595, "step": 5793 }, { "epoch": 19.979310344827585, "grad_norm": 0.7147350311279297, "learning_rate": 2.3958620689655174e-05, "loss": 0.2792, "step": 5794 }, { "epoch": 19.982758620689655, "grad_norm": 1.4423117637634277, "learning_rate": 2.3962758620689656e-05, "loss": 0.2681, "step": 5795 }, { "epoch": 19.986206896551725, "grad_norm": 0.9022579789161682, "learning_rate": 2.396689655172414e-05, "loss": 0.2522, "step": 5796 }, { "epoch": 19.989655172413794, "grad_norm": 1.0210108757019043, "learning_rate": 2.397103448275862e-05, "loss": 0.2652, "step": 5797 }, { "epoch": 19.99310344827586, "grad_norm": 1.0926764011383057, "learning_rate": 2.3975172413793105e-05, "loss": 0.2486, "step": 5798 }, { "epoch": 19.99655172413793, "grad_norm": 0.9297946691513062, "learning_rate": 2.3979310344827587e-05, "loss": 0.2826, "step": 5799 }, { "epoch": 20.0, "grad_norm": 1.9224969148635864, "learning_rate": 2.3983448275862073e-05, "loss": 0.3646, "step": 5800 }, { "epoch": 20.00344827586207, "grad_norm": 0.7917041182518005, "learning_rate": 2.398758620689655e-05, "loss": 0.2881, "step": 5801 }, { "epoch": 20.00689655172414, "grad_norm": 0.788443922996521, "learning_rate": 2.3991724137931033e-05, "loss": 0.2693, "step": 5802 }, { "epoch": 20.010344827586206, "grad_norm": 1.0840611457824707, "learning_rate": 2.399586206896552e-05, "loss": 0.2744, "step": 5803 }, { "epoch": 20.013793103448275, "grad_norm": 0.7270169854164124, "learning_rate": 2.4e-05, "loss": 0.2649, "step": 5804 }, { "epoch": 20.017241379310345, "grad_norm": 0.8352688550949097, "learning_rate": 2.4004137931034483e-05, "loss": 0.2626, "step": 5805 }, { "epoch": 20.020689655172415, "grad_norm": 0.9949734210968018, "learning_rate": 2.4008275862068965e-05, "loss": 0.2718, "step": 5806 }, { "epoch": 20.02413793103448, "grad_norm": 0.7692394256591797, "learning_rate": 2.401241379310345e-05, "loss": 0.2547, "step": 5807 }, { "epoch": 20.02758620689655, "grad_norm": 1.2537541389465332, "learning_rate": 2.4016551724137932e-05, "loss": 0.3294, "step": 5808 }, { "epoch": 20.03103448275862, "grad_norm": 1.8199892044067383, "learning_rate": 2.4020689655172414e-05, "loss": 0.2532, "step": 5809 }, { "epoch": 20.03448275862069, "grad_norm": 1.3625386953353882, "learning_rate": 2.4024827586206896e-05, "loss": 0.2714, "step": 5810 }, { "epoch": 20.03793103448276, "grad_norm": 0.8970034122467041, "learning_rate": 2.402896551724138e-05, "loss": 0.254, "step": 5811 }, { "epoch": 20.041379310344826, "grad_norm": 2.0023722648620605, "learning_rate": 2.4033103448275864e-05, "loss": 0.2754, "step": 5812 }, { "epoch": 20.044827586206896, "grad_norm": 1.215222716331482, "learning_rate": 2.4037241379310346e-05, "loss": 0.2726, "step": 5813 }, { "epoch": 20.048275862068966, "grad_norm": 1.4912782907485962, "learning_rate": 2.4041379310344828e-05, "loss": 0.2925, "step": 5814 }, { "epoch": 20.051724137931036, "grad_norm": 1.3074016571044922, "learning_rate": 2.404551724137931e-05, "loss": 0.2871, "step": 5815 }, { "epoch": 20.055172413793102, "grad_norm": 1.5918124914169312, "learning_rate": 2.4049655172413795e-05, "loss": 0.242, "step": 5816 }, { "epoch": 20.05862068965517, "grad_norm": 1.8915060758590698, "learning_rate": 2.4053793103448274e-05, "loss": 0.2612, "step": 5817 }, { "epoch": 20.06206896551724, "grad_norm": 1.147998332977295, "learning_rate": 2.405793103448276e-05, "loss": 0.2753, "step": 5818 }, { "epoch": 20.06551724137931, "grad_norm": 1.1225167512893677, "learning_rate": 2.406206896551724e-05, "loss": 0.2866, "step": 5819 }, { "epoch": 20.06896551724138, "grad_norm": 0.9758226275444031, "learning_rate": 2.4066206896551726e-05, "loss": 0.2709, "step": 5820 }, { "epoch": 20.072413793103447, "grad_norm": 3.095867395401001, "learning_rate": 2.4070344827586205e-05, "loss": 0.3245, "step": 5821 }, { "epoch": 20.075862068965517, "grad_norm": 1.9769436120986938, "learning_rate": 2.407448275862069e-05, "loss": 0.2675, "step": 5822 }, { "epoch": 20.079310344827586, "grad_norm": 1.8225014209747314, "learning_rate": 2.4078620689655173e-05, "loss": 0.257, "step": 5823 }, { "epoch": 20.082758620689656, "grad_norm": 4.223128318786621, "learning_rate": 2.4082758620689658e-05, "loss": 0.3042, "step": 5824 }, { "epoch": 20.086206896551722, "grad_norm": 2.2377800941467285, "learning_rate": 2.408689655172414e-05, "loss": 0.4168, "step": 5825 }, { "epoch": 20.089655172413792, "grad_norm": 0.9796725511550903, "learning_rate": 2.4091034482758622e-05, "loss": 0.3551, "step": 5826 }, { "epoch": 20.093103448275862, "grad_norm": 1.3015483617782593, "learning_rate": 2.4095172413793104e-05, "loss": 0.2792, "step": 5827 }, { "epoch": 20.09655172413793, "grad_norm": 0.7703789472579956, "learning_rate": 2.4099310344827586e-05, "loss": 0.2872, "step": 5828 }, { "epoch": 20.1, "grad_norm": 1.0663080215454102, "learning_rate": 2.410344827586207e-05, "loss": 0.2813, "step": 5829 }, { "epoch": 20.103448275862068, "grad_norm": 0.9200856685638428, "learning_rate": 2.410758620689655e-05, "loss": 0.2986, "step": 5830 }, { "epoch": 20.106896551724137, "grad_norm": 0.9310913681983948, "learning_rate": 2.4111724137931035e-05, "loss": 0.2971, "step": 5831 }, { "epoch": 20.110344827586207, "grad_norm": 0.9751786589622498, "learning_rate": 2.4115862068965517e-05, "loss": 0.3038, "step": 5832 }, { "epoch": 20.113793103448277, "grad_norm": 1.1064910888671875, "learning_rate": 2.4120000000000003e-05, "loss": 0.2562, "step": 5833 }, { "epoch": 20.117241379310343, "grad_norm": 0.783814549446106, "learning_rate": 2.412413793103448e-05, "loss": 0.2417, "step": 5834 }, { "epoch": 20.120689655172413, "grad_norm": 0.6676844954490662, "learning_rate": 2.4128275862068967e-05, "loss": 0.2888, "step": 5835 }, { "epoch": 20.124137931034483, "grad_norm": 0.6119788289070129, "learning_rate": 2.413241379310345e-05, "loss": 0.2709, "step": 5836 }, { "epoch": 20.127586206896552, "grad_norm": 0.6763014793395996, "learning_rate": 2.4136551724137934e-05, "loss": 0.2825, "step": 5837 }, { "epoch": 20.131034482758622, "grad_norm": 0.7812873721122742, "learning_rate": 2.4140689655172413e-05, "loss": 0.2467, "step": 5838 }, { "epoch": 20.13448275862069, "grad_norm": 1.4124560356140137, "learning_rate": 2.41448275862069e-05, "loss": 0.2892, "step": 5839 }, { "epoch": 20.137931034482758, "grad_norm": 0.8989611268043518, "learning_rate": 2.414896551724138e-05, "loss": 0.2296, "step": 5840 }, { "epoch": 20.141379310344828, "grad_norm": 0.715519905090332, "learning_rate": 2.4153103448275862e-05, "loss": 0.2621, "step": 5841 }, { "epoch": 20.144827586206898, "grad_norm": 1.0450280904769897, "learning_rate": 2.4157241379310344e-05, "loss": 0.2277, "step": 5842 }, { "epoch": 20.148275862068967, "grad_norm": 0.7791480422019958, "learning_rate": 2.4161379310344826e-05, "loss": 0.2487, "step": 5843 }, { "epoch": 20.151724137931033, "grad_norm": 0.8124345541000366, "learning_rate": 2.4165517241379312e-05, "loss": 0.2324, "step": 5844 }, { "epoch": 20.155172413793103, "grad_norm": 0.8643386960029602, "learning_rate": 2.4169655172413794e-05, "loss": 0.2538, "step": 5845 }, { "epoch": 20.158620689655173, "grad_norm": 1.0765928030014038, "learning_rate": 2.4173793103448276e-05, "loss": 0.2559, "step": 5846 }, { "epoch": 20.162068965517243, "grad_norm": 0.8519605994224548, "learning_rate": 2.4177931034482758e-05, "loss": 0.25, "step": 5847 }, { "epoch": 20.16551724137931, "grad_norm": 2.2511255741119385, "learning_rate": 2.4182068965517243e-05, "loss": 0.2902, "step": 5848 }, { "epoch": 20.16896551724138, "grad_norm": 1.4017654657363892, "learning_rate": 2.4186206896551725e-05, "loss": 0.3474, "step": 5849 }, { "epoch": 20.17241379310345, "grad_norm": 1.38009774684906, "learning_rate": 2.4190344827586207e-05, "loss": 0.4398, "step": 5850 }, { "epoch": 20.175862068965518, "grad_norm": 0.9847621321678162, "learning_rate": 2.419448275862069e-05, "loss": 0.3409, "step": 5851 }, { "epoch": 20.179310344827588, "grad_norm": 0.7336075901985168, "learning_rate": 2.4198620689655175e-05, "loss": 0.2784, "step": 5852 }, { "epoch": 20.182758620689654, "grad_norm": 0.7793371677398682, "learning_rate": 2.4202758620689657e-05, "loss": 0.3221, "step": 5853 }, { "epoch": 20.186206896551724, "grad_norm": 0.9354973435401917, "learning_rate": 2.4206896551724135e-05, "loss": 0.2811, "step": 5854 }, { "epoch": 20.189655172413794, "grad_norm": 1.0936801433563232, "learning_rate": 2.421103448275862e-05, "loss": 0.286, "step": 5855 }, { "epoch": 20.193103448275863, "grad_norm": 0.7911811470985413, "learning_rate": 2.4215172413793103e-05, "loss": 0.2346, "step": 5856 }, { "epoch": 20.19655172413793, "grad_norm": 0.7170079350471497, "learning_rate": 2.4219310344827588e-05, "loss": 0.2559, "step": 5857 }, { "epoch": 20.2, "grad_norm": 0.9508832097053528, "learning_rate": 2.422344827586207e-05, "loss": 0.2758, "step": 5858 }, { "epoch": 20.20344827586207, "grad_norm": 0.7671774625778198, "learning_rate": 2.4227586206896552e-05, "loss": 0.2487, "step": 5859 }, { "epoch": 20.20689655172414, "grad_norm": 1.075263261795044, "learning_rate": 2.4231724137931034e-05, "loss": 0.2746, "step": 5860 }, { "epoch": 20.21034482758621, "grad_norm": 1.8318336009979248, "learning_rate": 2.423586206896552e-05, "loss": 0.2732, "step": 5861 }, { "epoch": 20.213793103448275, "grad_norm": 2.102789878845215, "learning_rate": 2.4240000000000002e-05, "loss": 0.3116, "step": 5862 }, { "epoch": 20.217241379310344, "grad_norm": 0.9776105284690857, "learning_rate": 2.4244137931034484e-05, "loss": 0.2745, "step": 5863 }, { "epoch": 20.220689655172414, "grad_norm": 1.2142058610916138, "learning_rate": 2.4248275862068966e-05, "loss": 0.2794, "step": 5864 }, { "epoch": 20.224137931034484, "grad_norm": 1.2659292221069336, "learning_rate": 2.425241379310345e-05, "loss": 0.2515, "step": 5865 }, { "epoch": 20.22758620689655, "grad_norm": 0.9897915124893188, "learning_rate": 2.4256551724137933e-05, "loss": 0.257, "step": 5866 }, { "epoch": 20.23103448275862, "grad_norm": 0.9847066402435303, "learning_rate": 2.4260689655172412e-05, "loss": 0.2787, "step": 5867 }, { "epoch": 20.23448275862069, "grad_norm": 0.9508948922157288, "learning_rate": 2.4264827586206897e-05, "loss": 0.2531, "step": 5868 }, { "epoch": 20.23793103448276, "grad_norm": 7.730885028839111, "learning_rate": 2.426896551724138e-05, "loss": 0.2969, "step": 5869 }, { "epoch": 20.24137931034483, "grad_norm": 1.2116127014160156, "learning_rate": 2.4273103448275865e-05, "loss": 0.2432, "step": 5870 }, { "epoch": 20.244827586206895, "grad_norm": 1.3483628034591675, "learning_rate": 2.4277241379310343e-05, "loss": 0.2538, "step": 5871 }, { "epoch": 20.248275862068965, "grad_norm": 1.244947075843811, "learning_rate": 2.428137931034483e-05, "loss": 0.271, "step": 5872 }, { "epoch": 20.251724137931035, "grad_norm": 1.251749038696289, "learning_rate": 2.428551724137931e-05, "loss": 0.2922, "step": 5873 }, { "epoch": 20.255172413793105, "grad_norm": 1.3552366495132446, "learning_rate": 2.4289655172413796e-05, "loss": 0.2671, "step": 5874 }, { "epoch": 20.25862068965517, "grad_norm": 3.0805492401123047, "learning_rate": 2.4293793103448275e-05, "loss": 0.3952, "step": 5875 }, { "epoch": 20.26206896551724, "grad_norm": 1.2240508794784546, "learning_rate": 2.429793103448276e-05, "loss": 0.3411, "step": 5876 }, { "epoch": 20.26551724137931, "grad_norm": 0.9852182269096375, "learning_rate": 2.4302068965517242e-05, "loss": 0.2999, "step": 5877 }, { "epoch": 20.26896551724138, "grad_norm": 2.0852456092834473, "learning_rate": 2.4306206896551728e-05, "loss": 0.3228, "step": 5878 }, { "epoch": 20.27241379310345, "grad_norm": 1.0047844648361206, "learning_rate": 2.4310344827586206e-05, "loss": 0.2665, "step": 5879 }, { "epoch": 20.275862068965516, "grad_norm": 0.9182157516479492, "learning_rate": 2.4314482758620688e-05, "loss": 0.2927, "step": 5880 }, { "epoch": 20.279310344827586, "grad_norm": 0.9597567319869995, "learning_rate": 2.4318620689655174e-05, "loss": 0.266, "step": 5881 }, { "epoch": 20.282758620689656, "grad_norm": 1.0706771612167358, "learning_rate": 2.4322758620689656e-05, "loss": 0.2864, "step": 5882 }, { "epoch": 20.286206896551725, "grad_norm": 0.811588704586029, "learning_rate": 2.4326896551724138e-05, "loss": 0.2971, "step": 5883 }, { "epoch": 20.28965517241379, "grad_norm": 0.8285446166992188, "learning_rate": 2.433103448275862e-05, "loss": 0.2562, "step": 5884 }, { "epoch": 20.29310344827586, "grad_norm": 1.563591480255127, "learning_rate": 2.4335172413793105e-05, "loss": 0.2711, "step": 5885 }, { "epoch": 20.29655172413793, "grad_norm": 1.977132797241211, "learning_rate": 2.4339310344827587e-05, "loss": 0.2917, "step": 5886 }, { "epoch": 20.3, "grad_norm": 0.7375214099884033, "learning_rate": 2.434344827586207e-05, "loss": 0.2836, "step": 5887 }, { "epoch": 20.30344827586207, "grad_norm": 1.3715708255767822, "learning_rate": 2.434758620689655e-05, "loss": 0.2701, "step": 5888 }, { "epoch": 20.306896551724137, "grad_norm": 2.7224485874176025, "learning_rate": 2.4351724137931037e-05, "loss": 0.2631, "step": 5889 }, { "epoch": 20.310344827586206, "grad_norm": 1.4212831258773804, "learning_rate": 2.435586206896552e-05, "loss": 0.2626, "step": 5890 }, { "epoch": 20.313793103448276, "grad_norm": 0.91219562292099, "learning_rate": 2.4360000000000004e-05, "loss": 0.2799, "step": 5891 }, { "epoch": 20.317241379310346, "grad_norm": 2.075298309326172, "learning_rate": 2.4364137931034483e-05, "loss": 0.2634, "step": 5892 }, { "epoch": 20.320689655172412, "grad_norm": 0.9863964915275574, "learning_rate": 2.4368275862068965e-05, "loss": 0.2515, "step": 5893 }, { "epoch": 20.324137931034482, "grad_norm": 1.0888454914093018, "learning_rate": 2.437241379310345e-05, "loss": 0.2593, "step": 5894 }, { "epoch": 20.32758620689655, "grad_norm": 1.5388888120651245, "learning_rate": 2.4376551724137932e-05, "loss": 0.2496, "step": 5895 }, { "epoch": 20.33103448275862, "grad_norm": 1.520124912261963, "learning_rate": 2.4380689655172414e-05, "loss": 0.2503, "step": 5896 }, { "epoch": 20.33448275862069, "grad_norm": 3.5187296867370605, "learning_rate": 2.4384827586206896e-05, "loss": 0.2412, "step": 5897 }, { "epoch": 20.337931034482757, "grad_norm": 1.1050294637680054, "learning_rate": 2.438896551724138e-05, "loss": 0.2716, "step": 5898 }, { "epoch": 20.341379310344827, "grad_norm": 1.45724618434906, "learning_rate": 2.4393103448275863e-05, "loss": 0.2989, "step": 5899 }, { "epoch": 20.344827586206897, "grad_norm": 1.557517409324646, "learning_rate": 2.4397241379310345e-05, "loss": 0.3554, "step": 5900 }, { "epoch": 20.348275862068967, "grad_norm": 0.9181574583053589, "learning_rate": 2.4401379310344828e-05, "loss": 0.3046, "step": 5901 }, { "epoch": 20.351724137931033, "grad_norm": 0.676926851272583, "learning_rate": 2.4405517241379313e-05, "loss": 0.3219, "step": 5902 }, { "epoch": 20.355172413793102, "grad_norm": 0.8700073957443237, "learning_rate": 2.4409655172413795e-05, "loss": 0.2696, "step": 5903 }, { "epoch": 20.358620689655172, "grad_norm": 1.365107536315918, "learning_rate": 2.4413793103448277e-05, "loss": 0.2651, "step": 5904 }, { "epoch": 20.362068965517242, "grad_norm": 0.8827432990074158, "learning_rate": 2.441793103448276e-05, "loss": 0.2888, "step": 5905 }, { "epoch": 20.36551724137931, "grad_norm": 1.0172805786132812, "learning_rate": 2.442206896551724e-05, "loss": 0.2722, "step": 5906 }, { "epoch": 20.368965517241378, "grad_norm": 1.69404137134552, "learning_rate": 2.4426206896551726e-05, "loss": 0.2949, "step": 5907 }, { "epoch": 20.372413793103448, "grad_norm": 1.4598908424377441, "learning_rate": 2.4430344827586205e-05, "loss": 0.2985, "step": 5908 }, { "epoch": 20.375862068965517, "grad_norm": 0.7266607880592346, "learning_rate": 2.443448275862069e-05, "loss": 0.2663, "step": 5909 }, { "epoch": 20.379310344827587, "grad_norm": 1.2515987157821655, "learning_rate": 2.4438620689655172e-05, "loss": 0.2774, "step": 5910 }, { "epoch": 20.382758620689657, "grad_norm": 6.998562812805176, "learning_rate": 2.4442758620689658e-05, "loss": 0.2699, "step": 5911 }, { "epoch": 20.386206896551723, "grad_norm": 2.4055063724517822, "learning_rate": 2.4446896551724136e-05, "loss": 0.2738, "step": 5912 }, { "epoch": 20.389655172413793, "grad_norm": 0.8666790127754211, "learning_rate": 2.4451034482758622e-05, "loss": 0.2502, "step": 5913 }, { "epoch": 20.393103448275863, "grad_norm": 1.085526466369629, "learning_rate": 2.4455172413793104e-05, "loss": 0.2759, "step": 5914 }, { "epoch": 20.396551724137932, "grad_norm": 0.9692733883857727, "learning_rate": 2.445931034482759e-05, "loss": 0.2855, "step": 5915 }, { "epoch": 20.4, "grad_norm": 0.9223213195800781, "learning_rate": 2.4463448275862068e-05, "loss": 0.2974, "step": 5916 }, { "epoch": 20.40344827586207, "grad_norm": 1.2381278276443481, "learning_rate": 2.4467586206896553e-05, "loss": 0.2878, "step": 5917 }, { "epoch": 20.406896551724138, "grad_norm": 0.7965761423110962, "learning_rate": 2.4471724137931035e-05, "loss": 0.2792, "step": 5918 }, { "epoch": 20.410344827586208, "grad_norm": 1.1809200048446655, "learning_rate": 2.4475862068965517e-05, "loss": 0.2679, "step": 5919 }, { "epoch": 20.413793103448278, "grad_norm": 2.2895750999450684, "learning_rate": 2.448e-05, "loss": 0.2998, "step": 5920 }, { "epoch": 20.417241379310344, "grad_norm": 1.7564152479171753, "learning_rate": 2.448413793103448e-05, "loss": 0.2896, "step": 5921 }, { "epoch": 20.420689655172414, "grad_norm": 2.967428207397461, "learning_rate": 2.4488275862068967e-05, "loss": 0.2764, "step": 5922 }, { "epoch": 20.424137931034483, "grad_norm": 1.4042187929153442, "learning_rate": 2.449241379310345e-05, "loss": 0.2705, "step": 5923 }, { "epoch": 20.427586206896553, "grad_norm": 1.3730937242507935, "learning_rate": 2.4496551724137934e-05, "loss": 0.3213, "step": 5924 }, { "epoch": 20.43103448275862, "grad_norm": 1.8463270664215088, "learning_rate": 2.4500689655172413e-05, "loss": 0.3461, "step": 5925 }, { "epoch": 20.43448275862069, "grad_norm": 1.2175743579864502, "learning_rate": 2.4504827586206898e-05, "loss": 0.3445, "step": 5926 }, { "epoch": 20.43793103448276, "grad_norm": 1.1536431312561035, "learning_rate": 2.450896551724138e-05, "loss": 0.3168, "step": 5927 }, { "epoch": 20.44137931034483, "grad_norm": 0.7370527982711792, "learning_rate": 2.4513103448275866e-05, "loss": 0.3291, "step": 5928 }, { "epoch": 20.444827586206898, "grad_norm": 0.7831260561943054, "learning_rate": 2.4517241379310344e-05, "loss": 0.309, "step": 5929 }, { "epoch": 20.448275862068964, "grad_norm": 1.6269410848617554, "learning_rate": 2.4521379310344826e-05, "loss": 0.2487, "step": 5930 }, { "epoch": 20.451724137931034, "grad_norm": 0.9595816731452942, "learning_rate": 2.4525517241379312e-05, "loss": 0.3219, "step": 5931 }, { "epoch": 20.455172413793104, "grad_norm": 0.7226781249046326, "learning_rate": 2.4529655172413794e-05, "loss": 0.2657, "step": 5932 }, { "epoch": 20.458620689655174, "grad_norm": 10.374129295349121, "learning_rate": 2.4533793103448276e-05, "loss": 0.2639, "step": 5933 }, { "epoch": 20.46206896551724, "grad_norm": 1.88595712184906, "learning_rate": 2.4537931034482758e-05, "loss": 0.2957, "step": 5934 }, { "epoch": 20.46551724137931, "grad_norm": 1.0473605394363403, "learning_rate": 2.4542068965517243e-05, "loss": 0.269, "step": 5935 }, { "epoch": 20.46896551724138, "grad_norm": 0.7033248543739319, "learning_rate": 2.4546206896551725e-05, "loss": 0.2822, "step": 5936 }, { "epoch": 20.47241379310345, "grad_norm": 1.805145025253296, "learning_rate": 2.4550344827586207e-05, "loss": 0.267, "step": 5937 }, { "epoch": 20.47586206896552, "grad_norm": 0.9839615821838379, "learning_rate": 2.455448275862069e-05, "loss": 0.269, "step": 5938 }, { "epoch": 20.479310344827585, "grad_norm": 0.8586481213569641, "learning_rate": 2.4558620689655175e-05, "loss": 0.2348, "step": 5939 }, { "epoch": 20.482758620689655, "grad_norm": 1.8820284605026245, "learning_rate": 2.4562758620689657e-05, "loss": 0.2876, "step": 5940 }, { "epoch": 20.486206896551725, "grad_norm": 1.1807365417480469, "learning_rate": 2.456689655172414e-05, "loss": 0.2433, "step": 5941 }, { "epoch": 20.489655172413794, "grad_norm": 1.0033482313156128, "learning_rate": 2.457103448275862e-05, "loss": 0.2591, "step": 5942 }, { "epoch": 20.49310344827586, "grad_norm": 2.94455885887146, "learning_rate": 2.4575172413793103e-05, "loss": 0.3027, "step": 5943 }, { "epoch": 20.49655172413793, "grad_norm": 1.0557867288589478, "learning_rate": 2.4579310344827588e-05, "loss": 0.2827, "step": 5944 }, { "epoch": 20.5, "grad_norm": 1.7514270544052124, "learning_rate": 2.4583448275862067e-05, "loss": 0.3235, "step": 5945 }, { "epoch": 20.50344827586207, "grad_norm": 2.678234100341797, "learning_rate": 2.4587586206896552e-05, "loss": 0.2692, "step": 5946 }, { "epoch": 20.50689655172414, "grad_norm": 1.0714789628982544, "learning_rate": 2.4591724137931034e-05, "loss": 0.2443, "step": 5947 }, { "epoch": 20.510344827586206, "grad_norm": 1.6682257652282715, "learning_rate": 2.459586206896552e-05, "loss": 0.2475, "step": 5948 }, { "epoch": 20.513793103448275, "grad_norm": 2.9989120960235596, "learning_rate": 2.4599999999999998e-05, "loss": 0.2937, "step": 5949 }, { "epoch": 20.517241379310345, "grad_norm": 1.7717182636260986, "learning_rate": 2.4604137931034484e-05, "loss": 0.4031, "step": 5950 }, { "epoch": 20.520689655172415, "grad_norm": 0.6236950159072876, "learning_rate": 2.4608275862068966e-05, "loss": 0.328, "step": 5951 }, { "epoch": 20.52413793103448, "grad_norm": 0.5639208555221558, "learning_rate": 2.461241379310345e-05, "loss": 0.2981, "step": 5952 }, { "epoch": 20.52758620689655, "grad_norm": 0.7235707640647888, "learning_rate": 2.461655172413793e-05, "loss": 0.2695, "step": 5953 }, { "epoch": 20.53103448275862, "grad_norm": 3.100231885910034, "learning_rate": 2.4620689655172415e-05, "loss": 0.2818, "step": 5954 }, { "epoch": 20.53448275862069, "grad_norm": 0.669791042804718, "learning_rate": 2.4624827586206897e-05, "loss": 0.2636, "step": 5955 }, { "epoch": 20.53793103448276, "grad_norm": 0.8928493857383728, "learning_rate": 2.462896551724138e-05, "loss": 0.2655, "step": 5956 }, { "epoch": 20.541379310344826, "grad_norm": 1.1682881116867065, "learning_rate": 2.4633103448275865e-05, "loss": 0.2874, "step": 5957 }, { "epoch": 20.544827586206896, "grad_norm": 0.7010715007781982, "learning_rate": 2.4637241379310343e-05, "loss": 0.2728, "step": 5958 }, { "epoch": 20.548275862068966, "grad_norm": 1.1176410913467407, "learning_rate": 2.464137931034483e-05, "loss": 0.2686, "step": 5959 }, { "epoch": 20.551724137931036, "grad_norm": 0.9839648008346558, "learning_rate": 2.464551724137931e-05, "loss": 0.2486, "step": 5960 }, { "epoch": 20.555172413793102, "grad_norm": 0.862452507019043, "learning_rate": 2.4649655172413796e-05, "loss": 0.2732, "step": 5961 }, { "epoch": 20.55862068965517, "grad_norm": 0.9027678370475769, "learning_rate": 2.4653793103448275e-05, "loss": 0.2429, "step": 5962 }, { "epoch": 20.56206896551724, "grad_norm": 1.58556067943573, "learning_rate": 2.465793103448276e-05, "loss": 0.2845, "step": 5963 }, { "epoch": 20.56551724137931, "grad_norm": 1.4818799495697021, "learning_rate": 2.4662068965517242e-05, "loss": 0.2699, "step": 5964 }, { "epoch": 20.56896551724138, "grad_norm": 1.3736668825149536, "learning_rate": 2.4666206896551727e-05, "loss": 0.2489, "step": 5965 }, { "epoch": 20.572413793103447, "grad_norm": 0.9540401101112366, "learning_rate": 2.4670344827586206e-05, "loss": 0.2698, "step": 5966 }, { "epoch": 20.575862068965517, "grad_norm": 1.2344744205474854, "learning_rate": 2.467448275862069e-05, "loss": 0.2529, "step": 5967 }, { "epoch": 20.579310344827586, "grad_norm": 0.6163141131401062, "learning_rate": 2.4678620689655174e-05, "loss": 0.2711, "step": 5968 }, { "epoch": 20.582758620689656, "grad_norm": 0.892461895942688, "learning_rate": 2.4682758620689656e-05, "loss": 0.2424, "step": 5969 }, { "epoch": 20.586206896551722, "grad_norm": 1.5131131410598755, "learning_rate": 2.4686896551724138e-05, "loss": 0.2665, "step": 5970 }, { "epoch": 20.589655172413792, "grad_norm": 0.9195484519004822, "learning_rate": 2.469103448275862e-05, "loss": 0.2656, "step": 5971 }, { "epoch": 20.593103448275862, "grad_norm": 0.9595357775688171, "learning_rate": 2.4695172413793105e-05, "loss": 0.2403, "step": 5972 }, { "epoch": 20.59655172413793, "grad_norm": 1.4007562398910522, "learning_rate": 2.4699310344827587e-05, "loss": 0.2843, "step": 5973 }, { "epoch": 20.6, "grad_norm": 2.3804116249084473, "learning_rate": 2.470344827586207e-05, "loss": 0.3116, "step": 5974 }, { "epoch": 20.603448275862068, "grad_norm": 1.6443979740142822, "learning_rate": 2.470758620689655e-05, "loss": 0.4335, "step": 5975 }, { "epoch": 20.606896551724137, "grad_norm": 0.8944075703620911, "learning_rate": 2.4711724137931036e-05, "loss": 0.3585, "step": 5976 }, { "epoch": 20.610344827586207, "grad_norm": 0.7867250442504883, "learning_rate": 2.471586206896552e-05, "loss": 0.2482, "step": 5977 }, { "epoch": 20.613793103448277, "grad_norm": 0.7025296688079834, "learning_rate": 2.472e-05, "loss": 0.2953, "step": 5978 }, { "epoch": 20.617241379310343, "grad_norm": 0.6496264338493347, "learning_rate": 2.4724137931034483e-05, "loss": 0.2572, "step": 5979 }, { "epoch": 20.620689655172413, "grad_norm": 0.9209073781967163, "learning_rate": 2.4728275862068968e-05, "loss": 0.3032, "step": 5980 }, { "epoch": 20.624137931034483, "grad_norm": 0.5832102298736572, "learning_rate": 2.473241379310345e-05, "loss": 0.278, "step": 5981 }, { "epoch": 20.627586206896552, "grad_norm": 1.2298474311828613, "learning_rate": 2.473655172413793e-05, "loss": 0.3071, "step": 5982 }, { "epoch": 20.631034482758622, "grad_norm": 1.1095489263534546, "learning_rate": 2.4740689655172414e-05, "loss": 0.2805, "step": 5983 }, { "epoch": 20.63448275862069, "grad_norm": 0.7892383933067322, "learning_rate": 2.4744827586206896e-05, "loss": 0.272, "step": 5984 }, { "epoch": 20.637931034482758, "grad_norm": 1.3562664985656738, "learning_rate": 2.474896551724138e-05, "loss": 0.2406, "step": 5985 }, { "epoch": 20.641379310344828, "grad_norm": 1.5948188304901123, "learning_rate": 2.475310344827586e-05, "loss": 0.2735, "step": 5986 }, { "epoch": 20.644827586206898, "grad_norm": 0.7801092863082886, "learning_rate": 2.4757241379310345e-05, "loss": 0.2977, "step": 5987 }, { "epoch": 20.648275862068964, "grad_norm": 1.6232225894927979, "learning_rate": 2.4761379310344827e-05, "loss": 0.2895, "step": 5988 }, { "epoch": 20.651724137931033, "grad_norm": 1.0682679414749146, "learning_rate": 2.4765517241379313e-05, "loss": 0.2629, "step": 5989 }, { "epoch": 20.655172413793103, "grad_norm": 1.193141222000122, "learning_rate": 2.476965517241379e-05, "loss": 0.2676, "step": 5990 }, { "epoch": 20.658620689655173, "grad_norm": 1.9155596494674683, "learning_rate": 2.4773793103448277e-05, "loss": 0.2503, "step": 5991 }, { "epoch": 20.662068965517243, "grad_norm": 1.8464441299438477, "learning_rate": 2.477793103448276e-05, "loss": 0.2573, "step": 5992 }, { "epoch": 20.66551724137931, "grad_norm": 1.0713253021240234, "learning_rate": 2.4782068965517244e-05, "loss": 0.262, "step": 5993 }, { "epoch": 20.66896551724138, "grad_norm": 0.9467431902885437, "learning_rate": 2.4786206896551726e-05, "loss": 0.2426, "step": 5994 }, { "epoch": 20.67241379310345, "grad_norm": 1.4343286752700806, "learning_rate": 2.4790344827586205e-05, "loss": 0.2413, "step": 5995 }, { "epoch": 20.675862068965518, "grad_norm": 1.5141414403915405, "learning_rate": 2.479448275862069e-05, "loss": 0.2799, "step": 5996 }, { "epoch": 20.679310344827588, "grad_norm": 1.7601078748703003, "learning_rate": 2.4798620689655172e-05, "loss": 0.2386, "step": 5997 }, { "epoch": 20.682758620689654, "grad_norm": 1.0491293668746948, "learning_rate": 2.4802758620689658e-05, "loss": 0.2991, "step": 5998 }, { "epoch": 20.686206896551724, "grad_norm": 1.8204455375671387, "learning_rate": 2.4806896551724136e-05, "loss": 0.3374, "step": 5999 }, { "epoch": 20.689655172413794, "grad_norm": 1.4962587356567383, "learning_rate": 2.4811034482758622e-05, "loss": 0.4161, "step": 6000 }, { "epoch": 20.689655172413794, "eval_cer": 0.13598288615685972, "eval_loss": 0.31916168332099915, "eval_runtime": 18.3206, "eval_samples_per_second": 50.599, "eval_steps_per_second": 0.164, "eval_wer": 0.32259316770186336, "step": 6000 }, { "epoch": 20.693103448275863, "grad_norm": 0.8281209468841553, "learning_rate": 2.4815172413793104e-05, "loss": 0.3399, "step": 6001 }, { "epoch": 20.69655172413793, "grad_norm": 0.9733096957206726, "learning_rate": 2.481931034482759e-05, "loss": 0.2939, "step": 6002 }, { "epoch": 20.7, "grad_norm": 0.6726796627044678, "learning_rate": 2.4823448275862068e-05, "loss": 0.3136, "step": 6003 }, { "epoch": 20.70344827586207, "grad_norm": 1.5972752571105957, "learning_rate": 2.4827586206896553e-05, "loss": 0.2756, "step": 6004 }, { "epoch": 20.70689655172414, "grad_norm": 0.7714618444442749, "learning_rate": 2.4831724137931035e-05, "loss": 0.3027, "step": 6005 }, { "epoch": 20.71034482758621, "grad_norm": 0.9295017123222351, "learning_rate": 2.483586206896552e-05, "loss": 0.269, "step": 6006 }, { "epoch": 20.713793103448275, "grad_norm": 1.0633471012115479, "learning_rate": 2.484e-05, "loss": 0.2633, "step": 6007 }, { "epoch": 20.717241379310344, "grad_norm": 1.0823094844818115, "learning_rate": 2.484413793103448e-05, "loss": 0.2721, "step": 6008 }, { "epoch": 20.720689655172414, "grad_norm": 0.8046032190322876, "learning_rate": 2.4848275862068967e-05, "loss": 0.2599, "step": 6009 }, { "epoch": 20.724137931034484, "grad_norm": 0.8341232538223267, "learning_rate": 2.485241379310345e-05, "loss": 0.2739, "step": 6010 }, { "epoch": 20.72758620689655, "grad_norm": 0.6921012997627258, "learning_rate": 2.485655172413793e-05, "loss": 0.2687, "step": 6011 }, { "epoch": 20.73103448275862, "grad_norm": 0.8554514646530151, "learning_rate": 2.4860689655172413e-05, "loss": 0.2643, "step": 6012 }, { "epoch": 20.73448275862069, "grad_norm": 0.750910222530365, "learning_rate": 2.4864827586206898e-05, "loss": 0.2656, "step": 6013 }, { "epoch": 20.73793103448276, "grad_norm": 0.8181694746017456, "learning_rate": 2.486896551724138e-05, "loss": 0.2583, "step": 6014 }, { "epoch": 20.74137931034483, "grad_norm": 1.2440348863601685, "learning_rate": 2.4873103448275862e-05, "loss": 0.2592, "step": 6015 }, { "epoch": 20.744827586206895, "grad_norm": 0.9384556412696838, "learning_rate": 2.4877241379310344e-05, "loss": 0.2524, "step": 6016 }, { "epoch": 20.748275862068965, "grad_norm": 0.7644951939582825, "learning_rate": 2.488137931034483e-05, "loss": 0.2537, "step": 6017 }, { "epoch": 20.751724137931035, "grad_norm": 1.3285651206970215, "learning_rate": 2.4885517241379312e-05, "loss": 0.2788, "step": 6018 }, { "epoch": 20.755172413793105, "grad_norm": 0.8091766834259033, "learning_rate": 2.4889655172413794e-05, "loss": 0.2648, "step": 6019 }, { "epoch": 20.75862068965517, "grad_norm": 1.0133153200149536, "learning_rate": 2.4893793103448276e-05, "loss": 0.2594, "step": 6020 }, { "epoch": 20.76206896551724, "grad_norm": 0.93911212682724, "learning_rate": 2.4897931034482758e-05, "loss": 0.2796, "step": 6021 }, { "epoch": 20.76551724137931, "grad_norm": 0.9372729063034058, "learning_rate": 2.4902068965517243e-05, "loss": 0.2671, "step": 6022 }, { "epoch": 20.76896551724138, "grad_norm": 0.8382241129875183, "learning_rate": 2.4906206896551722e-05, "loss": 0.291, "step": 6023 }, { "epoch": 20.77241379310345, "grad_norm": 1.171263575553894, "learning_rate": 2.4910344827586207e-05, "loss": 0.271, "step": 6024 }, { "epoch": 20.775862068965516, "grad_norm": 3.119178295135498, "learning_rate": 2.491448275862069e-05, "loss": 0.4134, "step": 6025 }, { "epoch": 20.779310344827586, "grad_norm": 0.733735203742981, "learning_rate": 2.4918620689655175e-05, "loss": 0.3259, "step": 6026 }, { "epoch": 20.782758620689656, "grad_norm": 1.9380825757980347, "learning_rate": 2.4922758620689657e-05, "loss": 0.3078, "step": 6027 }, { "epoch": 20.786206896551725, "grad_norm": 0.6615747213363647, "learning_rate": 2.492689655172414e-05, "loss": 0.2678, "step": 6028 }, { "epoch": 20.78965517241379, "grad_norm": 1.1731269359588623, "learning_rate": 2.493103448275862e-05, "loss": 0.2753, "step": 6029 }, { "epoch": 20.79310344827586, "grad_norm": 1.1009879112243652, "learning_rate": 2.4935172413793106e-05, "loss": 0.3041, "step": 6030 }, { "epoch": 20.79655172413793, "grad_norm": 1.4415253400802612, "learning_rate": 2.4939310344827588e-05, "loss": 0.2474, "step": 6031 }, { "epoch": 20.8, "grad_norm": 0.7679911255836487, "learning_rate": 2.494344827586207e-05, "loss": 0.2737, "step": 6032 }, { "epoch": 20.80344827586207, "grad_norm": 0.9737663269042969, "learning_rate": 2.4947586206896552e-05, "loss": 0.2615, "step": 6033 }, { "epoch": 20.806896551724137, "grad_norm": 0.9640787243843079, "learning_rate": 2.4951724137931034e-05, "loss": 0.2309, "step": 6034 }, { "epoch": 20.810344827586206, "grad_norm": 0.8331074118614197, "learning_rate": 2.495586206896552e-05, "loss": 0.2416, "step": 6035 }, { "epoch": 20.813793103448276, "grad_norm": 0.6937297582626343, "learning_rate": 2.4959999999999998e-05, "loss": 0.2523, "step": 6036 }, { "epoch": 20.817241379310346, "grad_norm": 0.8646946549415588, "learning_rate": 2.4964137931034484e-05, "loss": 0.2489, "step": 6037 }, { "epoch": 20.820689655172412, "grad_norm": 0.9855314493179321, "learning_rate": 2.4968275862068966e-05, "loss": 0.2518, "step": 6038 }, { "epoch": 20.824137931034482, "grad_norm": 0.9649326205253601, "learning_rate": 2.497241379310345e-05, "loss": 0.2839, "step": 6039 }, { "epoch": 20.82758620689655, "grad_norm": 1.013390064239502, "learning_rate": 2.497655172413793e-05, "loss": 0.2991, "step": 6040 }, { "epoch": 20.83103448275862, "grad_norm": 1.189270257949829, "learning_rate": 2.4980689655172415e-05, "loss": 0.289, "step": 6041 }, { "epoch": 20.83448275862069, "grad_norm": 0.9808408617973328, "learning_rate": 2.4984827586206897e-05, "loss": 0.2447, "step": 6042 }, { "epoch": 20.837931034482757, "grad_norm": 0.7882118225097656, "learning_rate": 2.4988965517241382e-05, "loss": 0.2613, "step": 6043 }, { "epoch": 20.841379310344827, "grad_norm": 1.1357070207595825, "learning_rate": 2.499310344827586e-05, "loss": 0.2826, "step": 6044 }, { "epoch": 20.844827586206897, "grad_norm": 0.6883044838905334, "learning_rate": 2.4997241379310347e-05, "loss": 0.2467, "step": 6045 }, { "epoch": 20.848275862068967, "grad_norm": 0.9113451838493347, "learning_rate": 2.500137931034483e-05, "loss": 0.2683, "step": 6046 }, { "epoch": 20.851724137931036, "grad_norm": 2.764404773712158, "learning_rate": 2.500551724137931e-05, "loss": 0.2652, "step": 6047 }, { "epoch": 20.855172413793102, "grad_norm": 5.008090972900391, "learning_rate": 2.5009655172413793e-05, "loss": 0.2978, "step": 6048 }, { "epoch": 20.858620689655172, "grad_norm": 1.2572609186172485, "learning_rate": 2.5013793103448275e-05, "loss": 0.2773, "step": 6049 }, { "epoch": 20.862068965517242, "grad_norm": 1.4112602472305298, "learning_rate": 2.501793103448276e-05, "loss": 0.3684, "step": 6050 }, { "epoch": 20.86551724137931, "grad_norm": 0.6400282979011536, "learning_rate": 2.5022068965517242e-05, "loss": 0.3555, "step": 6051 }, { "epoch": 20.868965517241378, "grad_norm": 0.9014937281608582, "learning_rate": 2.5026206896551724e-05, "loss": 0.2822, "step": 6052 }, { "epoch": 20.872413793103448, "grad_norm": 0.6823381781578064, "learning_rate": 2.5030344827586206e-05, "loss": 0.2908, "step": 6053 }, { "epoch": 20.875862068965517, "grad_norm": 0.88852459192276, "learning_rate": 2.503448275862069e-05, "loss": 0.3217, "step": 6054 }, { "epoch": 20.879310344827587, "grad_norm": 1.0471395254135132, "learning_rate": 2.5038620689655173e-05, "loss": 0.264, "step": 6055 }, { "epoch": 20.882758620689657, "grad_norm": 0.9276079535484314, "learning_rate": 2.5042758620689655e-05, "loss": 0.2867, "step": 6056 }, { "epoch": 20.886206896551723, "grad_norm": 0.8232904672622681, "learning_rate": 2.5046896551724138e-05, "loss": 0.2746, "step": 6057 }, { "epoch": 20.889655172413793, "grad_norm": 0.9306579828262329, "learning_rate": 2.5051034482758623e-05, "loss": 0.2779, "step": 6058 }, { "epoch": 20.893103448275863, "grad_norm": 0.8261183500289917, "learning_rate": 2.5055172413793105e-05, "loss": 0.275, "step": 6059 }, { "epoch": 20.896551724137932, "grad_norm": 1.0244858264923096, "learning_rate": 2.5059310344827587e-05, "loss": 0.2773, "step": 6060 }, { "epoch": 20.9, "grad_norm": 0.7661265730857849, "learning_rate": 2.506344827586207e-05, "loss": 0.2357, "step": 6061 }, { "epoch": 20.90344827586207, "grad_norm": 1.553234577178955, "learning_rate": 2.506758620689655e-05, "loss": 0.2787, "step": 6062 }, { "epoch": 20.906896551724138, "grad_norm": 0.9049566984176636, "learning_rate": 2.5071724137931036e-05, "loss": 0.2672, "step": 6063 }, { "epoch": 20.910344827586208, "grad_norm": 0.6650737524032593, "learning_rate": 2.507586206896552e-05, "loss": 0.273, "step": 6064 }, { "epoch": 20.913793103448278, "grad_norm": 1.0427517890930176, "learning_rate": 2.508e-05, "loss": 0.3152, "step": 6065 }, { "epoch": 20.917241379310344, "grad_norm": 1.515357255935669, "learning_rate": 2.5084137931034482e-05, "loss": 0.248, "step": 6066 }, { "epoch": 20.920689655172414, "grad_norm": 0.9039967656135559, "learning_rate": 2.5088275862068968e-05, "loss": 0.2842, "step": 6067 }, { "epoch": 20.924137931034483, "grad_norm": 0.9660398364067078, "learning_rate": 2.509241379310345e-05, "loss": 0.2534, "step": 6068 }, { "epoch": 20.927586206896553, "grad_norm": 0.7690753936767578, "learning_rate": 2.5096551724137932e-05, "loss": 0.275, "step": 6069 }, { "epoch": 20.93103448275862, "grad_norm": 1.0411933660507202, "learning_rate": 2.5100689655172414e-05, "loss": 0.2693, "step": 6070 }, { "epoch": 20.93448275862069, "grad_norm": 0.8890439867973328, "learning_rate": 2.51048275862069e-05, "loss": 0.2862, "step": 6071 }, { "epoch": 20.93793103448276, "grad_norm": 0.9494467973709106, "learning_rate": 2.510896551724138e-05, "loss": 0.2581, "step": 6072 }, { "epoch": 20.94137931034483, "grad_norm": 4.666723251342773, "learning_rate": 2.511310344827586e-05, "loss": 0.2695, "step": 6073 }, { "epoch": 20.944827586206898, "grad_norm": 1.8787544965744019, "learning_rate": 2.5117241379310345e-05, "loss": 0.2907, "step": 6074 }, { "epoch": 20.948275862068964, "grad_norm": 1.6689021587371826, "learning_rate": 2.5121379310344827e-05, "loss": 0.3417, "step": 6075 }, { "epoch": 20.951724137931034, "grad_norm": 2.1866793632507324, "learning_rate": 2.5125517241379313e-05, "loss": 0.3144, "step": 6076 }, { "epoch": 20.955172413793104, "grad_norm": 0.8152338862419128, "learning_rate": 2.512965517241379e-05, "loss": 0.2862, "step": 6077 }, { "epoch": 20.958620689655174, "grad_norm": 0.7015178799629211, "learning_rate": 2.5133793103448277e-05, "loss": 0.2763, "step": 6078 }, { "epoch": 20.96206896551724, "grad_norm": 0.6348142623901367, "learning_rate": 2.513793103448276e-05, "loss": 0.2805, "step": 6079 }, { "epoch": 20.96551724137931, "grad_norm": 0.9897341728210449, "learning_rate": 2.5142068965517244e-05, "loss": 0.3016, "step": 6080 }, { "epoch": 20.96896551724138, "grad_norm": 0.887215793132782, "learning_rate": 2.5146206896551723e-05, "loss": 0.2843, "step": 6081 }, { "epoch": 20.97241379310345, "grad_norm": 0.9647695422172546, "learning_rate": 2.5150344827586208e-05, "loss": 0.2769, "step": 6082 }, { "epoch": 20.97586206896552, "grad_norm": 0.7806251049041748, "learning_rate": 2.515448275862069e-05, "loss": 0.2547, "step": 6083 }, { "epoch": 20.979310344827585, "grad_norm": 1.1621158123016357, "learning_rate": 2.5158620689655176e-05, "loss": 0.2727, "step": 6084 }, { "epoch": 20.982758620689655, "grad_norm": 0.7824996113777161, "learning_rate": 2.5162758620689654e-05, "loss": 0.2549, "step": 6085 }, { "epoch": 20.986206896551725, "grad_norm": 0.9368327856063843, "learning_rate": 2.5166896551724136e-05, "loss": 0.2678, "step": 6086 }, { "epoch": 20.989655172413794, "grad_norm": 1.2468675374984741, "learning_rate": 2.5171034482758622e-05, "loss": 0.2802, "step": 6087 }, { "epoch": 20.99310344827586, "grad_norm": 1.3206440210342407, "learning_rate": 2.5175172413793104e-05, "loss": 0.2877, "step": 6088 }, { "epoch": 20.99655172413793, "grad_norm": 1.3962833881378174, "learning_rate": 2.5179310344827586e-05, "loss": 0.2963, "step": 6089 }, { "epoch": 21.0, "grad_norm": 1.2319051027297974, "learning_rate": 2.5183448275862068e-05, "loss": 0.3131, "step": 6090 }, { "epoch": 21.00344827586207, "grad_norm": 0.6115660071372986, "learning_rate": 2.5187586206896553e-05, "loss": 0.3245, "step": 6091 }, { "epoch": 21.00689655172414, "grad_norm": 0.9538983702659607, "learning_rate": 2.5191724137931035e-05, "loss": 0.2853, "step": 6092 }, { "epoch": 21.010344827586206, "grad_norm": 0.7029519081115723, "learning_rate": 2.519586206896552e-05, "loss": 0.3109, "step": 6093 }, { "epoch": 21.013793103448275, "grad_norm": 0.7733204364776611, "learning_rate": 2.52e-05, "loss": 0.2261, "step": 6094 }, { "epoch": 21.017241379310345, "grad_norm": 0.7470688223838806, "learning_rate": 2.5204137931034485e-05, "loss": 0.2657, "step": 6095 }, { "epoch": 21.020689655172415, "grad_norm": 0.5844210386276245, "learning_rate": 2.5208275862068967e-05, "loss": 0.2517, "step": 6096 }, { "epoch": 21.02413793103448, "grad_norm": 0.7937100529670715, "learning_rate": 2.521241379310345e-05, "loss": 0.2425, "step": 6097 }, { "epoch": 21.02758620689655, "grad_norm": 1.2548034191131592, "learning_rate": 2.521655172413793e-05, "loss": 0.2551, "step": 6098 }, { "epoch": 21.03103448275862, "grad_norm": 0.981842577457428, "learning_rate": 2.5220689655172413e-05, "loss": 0.2315, "step": 6099 }, { "epoch": 21.03448275862069, "grad_norm": 0.589444100856781, "learning_rate": 2.5224827586206898e-05, "loss": 0.2251, "step": 6100 }, { "epoch": 21.03793103448276, "grad_norm": 1.3385952711105347, "learning_rate": 2.522896551724138e-05, "loss": 0.294, "step": 6101 }, { "epoch": 21.041379310344826, "grad_norm": 0.7436345219612122, "learning_rate": 2.5233103448275862e-05, "loss": 0.2607, "step": 6102 }, { "epoch": 21.044827586206896, "grad_norm": 0.9510999917984009, "learning_rate": 2.5237241379310344e-05, "loss": 0.2683, "step": 6103 }, { "epoch": 21.048275862068966, "grad_norm": 0.7933831214904785, "learning_rate": 2.524137931034483e-05, "loss": 0.253, "step": 6104 }, { "epoch": 21.051724137931036, "grad_norm": 0.6621162295341492, "learning_rate": 2.524551724137931e-05, "loss": 0.2602, "step": 6105 }, { "epoch": 21.055172413793102, "grad_norm": 0.57001131772995, "learning_rate": 2.5249655172413794e-05, "loss": 0.2585, "step": 6106 }, { "epoch": 21.05862068965517, "grad_norm": 0.9051593542098999, "learning_rate": 2.5253793103448276e-05, "loss": 0.2662, "step": 6107 }, { "epoch": 21.06206896551724, "grad_norm": 1.0814834833145142, "learning_rate": 2.525793103448276e-05, "loss": 0.2139, "step": 6108 }, { "epoch": 21.06551724137931, "grad_norm": 1.2903869152069092, "learning_rate": 2.5262068965517243e-05, "loss": 0.2596, "step": 6109 }, { "epoch": 21.06896551724138, "grad_norm": 1.051862120628357, "learning_rate": 2.5266206896551725e-05, "loss": 0.258, "step": 6110 }, { "epoch": 21.072413793103447, "grad_norm": 1.0301316976547241, "learning_rate": 2.5270344827586207e-05, "loss": 0.253, "step": 6111 }, { "epoch": 21.075862068965517, "grad_norm": 0.9505766034126282, "learning_rate": 2.527448275862069e-05, "loss": 0.241, "step": 6112 }, { "epoch": 21.079310344827586, "grad_norm": 2.11545991897583, "learning_rate": 2.5278620689655175e-05, "loss": 0.2894, "step": 6113 }, { "epoch": 21.082758620689656, "grad_norm": 1.1223788261413574, "learning_rate": 2.5282758620689653e-05, "loss": 0.2389, "step": 6114 }, { "epoch": 21.086206896551722, "grad_norm": 2.2323179244995117, "learning_rate": 2.528689655172414e-05, "loss": 0.3638, "step": 6115 }, { "epoch": 21.089655172413792, "grad_norm": 0.7572107911109924, "learning_rate": 2.529103448275862e-05, "loss": 0.3525, "step": 6116 }, { "epoch": 21.093103448275862, "grad_norm": 0.5289387106895447, "learning_rate": 2.5295172413793106e-05, "loss": 0.2594, "step": 6117 }, { "epoch": 21.09655172413793, "grad_norm": 0.5190026760101318, "learning_rate": 2.5299310344827585e-05, "loss": 0.2585, "step": 6118 }, { "epoch": 21.1, "grad_norm": 0.7391384243965149, "learning_rate": 2.530344827586207e-05, "loss": 0.3, "step": 6119 }, { "epoch": 21.103448275862068, "grad_norm": 0.7788286805152893, "learning_rate": 2.5307586206896552e-05, "loss": 0.2822, "step": 6120 }, { "epoch": 21.106896551724137, "grad_norm": 0.6070098876953125, "learning_rate": 2.5311724137931037e-05, "loss": 0.2367, "step": 6121 }, { "epoch": 21.110344827586207, "grad_norm": 0.8013535737991333, "learning_rate": 2.5315862068965516e-05, "loss": 0.2434, "step": 6122 }, { "epoch": 21.113793103448277, "grad_norm": 1.287428855895996, "learning_rate": 2.5319999999999998e-05, "loss": 0.2614, "step": 6123 }, { "epoch": 21.117241379310343, "grad_norm": 0.607435405254364, "learning_rate": 2.5324137931034484e-05, "loss": 0.2572, "step": 6124 }, { "epoch": 21.120689655172413, "grad_norm": 0.7670021057128906, "learning_rate": 2.5328275862068966e-05, "loss": 0.261, "step": 6125 }, { "epoch": 21.124137931034483, "grad_norm": 0.9166030287742615, "learning_rate": 2.533241379310345e-05, "loss": 0.2425, "step": 6126 }, { "epoch": 21.127586206896552, "grad_norm": 1.1156569719314575, "learning_rate": 2.533655172413793e-05, "loss": 0.2779, "step": 6127 }, { "epoch": 21.131034482758622, "grad_norm": 0.8271479606628418, "learning_rate": 2.5340689655172415e-05, "loss": 0.2658, "step": 6128 }, { "epoch": 21.13448275862069, "grad_norm": 1.033759593963623, "learning_rate": 2.5344827586206897e-05, "loss": 0.2449, "step": 6129 }, { "epoch": 21.137931034482758, "grad_norm": 1.2542314529418945, "learning_rate": 2.5348965517241382e-05, "loss": 0.2433, "step": 6130 }, { "epoch": 21.141379310344828, "grad_norm": 2.178727149963379, "learning_rate": 2.535310344827586e-05, "loss": 0.2273, "step": 6131 }, { "epoch": 21.144827586206898, "grad_norm": 0.7333078980445862, "learning_rate": 2.5357241379310346e-05, "loss": 0.2352, "step": 6132 }, { "epoch": 21.148275862068967, "grad_norm": 1.0938440561294556, "learning_rate": 2.536137931034483e-05, "loss": 0.2516, "step": 6133 }, { "epoch": 21.151724137931033, "grad_norm": 1.0129015445709229, "learning_rate": 2.5365517241379314e-05, "loss": 0.2772, "step": 6134 }, { "epoch": 21.155172413793103, "grad_norm": 0.9976531863212585, "learning_rate": 2.5369655172413793e-05, "loss": 0.2487, "step": 6135 }, { "epoch": 21.158620689655173, "grad_norm": 0.9400806427001953, "learning_rate": 2.5373793103448275e-05, "loss": 0.2725, "step": 6136 }, { "epoch": 21.162068965517243, "grad_norm": 0.992837131023407, "learning_rate": 2.537793103448276e-05, "loss": 0.2328, "step": 6137 }, { "epoch": 21.16551724137931, "grad_norm": 1.1345460414886475, "learning_rate": 2.5382068965517242e-05, "loss": 0.2708, "step": 6138 }, { "epoch": 21.16896551724138, "grad_norm": 1.4353195428848267, "learning_rate": 2.5386206896551724e-05, "loss": 0.3216, "step": 6139 }, { "epoch": 21.17241379310345, "grad_norm": 1.9720996618270874, "learning_rate": 2.5390344827586206e-05, "loss": 0.3444, "step": 6140 }, { "epoch": 21.175862068965518, "grad_norm": 0.6949540972709656, "learning_rate": 2.539448275862069e-05, "loss": 0.297, "step": 6141 }, { "epoch": 21.179310344827588, "grad_norm": 0.7343783378601074, "learning_rate": 2.5398620689655173e-05, "loss": 0.2985, "step": 6142 }, { "epoch": 21.182758620689654, "grad_norm": 1.0290710926055908, "learning_rate": 2.5402758620689655e-05, "loss": 0.3204, "step": 6143 }, { "epoch": 21.186206896551724, "grad_norm": 1.1461796760559082, "learning_rate": 2.5406896551724137e-05, "loss": 0.2937, "step": 6144 }, { "epoch": 21.189655172413794, "grad_norm": 0.8144137263298035, "learning_rate": 2.5411034482758623e-05, "loss": 0.2805, "step": 6145 }, { "epoch": 21.193103448275863, "grad_norm": 0.631061315536499, "learning_rate": 2.5415172413793105e-05, "loss": 0.2531, "step": 6146 }, { "epoch": 21.19655172413793, "grad_norm": 0.7341410517692566, "learning_rate": 2.5419310344827587e-05, "loss": 0.2721, "step": 6147 }, { "epoch": 21.2, "grad_norm": 0.860614001750946, "learning_rate": 2.542344827586207e-05, "loss": 0.2855, "step": 6148 }, { "epoch": 21.20344827586207, "grad_norm": 0.9044955968856812, "learning_rate": 2.542758620689655e-05, "loss": 0.2592, "step": 6149 }, { "epoch": 21.20689655172414, "grad_norm": 0.9192856550216675, "learning_rate": 2.5431724137931036e-05, "loss": 0.2771, "step": 6150 }, { "epoch": 21.21034482758621, "grad_norm": 0.7744680643081665, "learning_rate": 2.5435862068965515e-05, "loss": 0.2438, "step": 6151 }, { "epoch": 21.213793103448275, "grad_norm": 0.9349953532218933, "learning_rate": 2.544e-05, "loss": 0.2808, "step": 6152 }, { "epoch": 21.217241379310344, "grad_norm": 0.6233842968940735, "learning_rate": 2.5444137931034482e-05, "loss": 0.266, "step": 6153 }, { "epoch": 21.220689655172414, "grad_norm": 1.8996765613555908, "learning_rate": 2.5448275862068968e-05, "loss": 0.2471, "step": 6154 }, { "epoch": 21.224137931034484, "grad_norm": 1.9284424781799316, "learning_rate": 2.5452413793103446e-05, "loss": 0.2757, "step": 6155 }, { "epoch": 21.22758620689655, "grad_norm": 0.941113293170929, "learning_rate": 2.5456551724137932e-05, "loss": 0.2493, "step": 6156 }, { "epoch": 21.23103448275862, "grad_norm": 0.7186685800552368, "learning_rate": 2.5460689655172414e-05, "loss": 0.2589, "step": 6157 }, { "epoch": 21.23448275862069, "grad_norm": 0.7235294580459595, "learning_rate": 2.54648275862069e-05, "loss": 0.253, "step": 6158 }, { "epoch": 21.23793103448276, "grad_norm": 1.9241795539855957, "learning_rate": 2.546896551724138e-05, "loss": 0.265, "step": 6159 }, { "epoch": 21.24137931034483, "grad_norm": 0.7917109131813049, "learning_rate": 2.5473103448275863e-05, "loss": 0.2338, "step": 6160 }, { "epoch": 21.244827586206895, "grad_norm": 0.9235842823982239, "learning_rate": 2.5477241379310345e-05, "loss": 0.2744, "step": 6161 }, { "epoch": 21.248275862068965, "grad_norm": 1.1259469985961914, "learning_rate": 2.5481379310344827e-05, "loss": 0.2478, "step": 6162 }, { "epoch": 21.251724137931035, "grad_norm": 1.2782684564590454, "learning_rate": 2.5485517241379313e-05, "loss": 0.2372, "step": 6163 }, { "epoch": 21.255172413793105, "grad_norm": 1.0473932027816772, "learning_rate": 2.548965517241379e-05, "loss": 0.2779, "step": 6164 }, { "epoch": 21.25862068965517, "grad_norm": 2.56831955909729, "learning_rate": 2.5493793103448277e-05, "loss": 0.4221, "step": 6165 }, { "epoch": 21.26206896551724, "grad_norm": 0.6851054430007935, "learning_rate": 2.549793103448276e-05, "loss": 0.2988, "step": 6166 }, { "epoch": 21.26551724137931, "grad_norm": 1.5439311265945435, "learning_rate": 2.5502068965517244e-05, "loss": 0.2968, "step": 6167 }, { "epoch": 21.26896551724138, "grad_norm": 0.9313060641288757, "learning_rate": 2.5506206896551723e-05, "loss": 0.3105, "step": 6168 }, { "epoch": 21.27241379310345, "grad_norm": 1.4110502004623413, "learning_rate": 2.5510344827586208e-05, "loss": 0.2569, "step": 6169 }, { "epoch": 21.275862068965516, "grad_norm": 0.8005468845367432, "learning_rate": 2.551448275862069e-05, "loss": 0.2904, "step": 6170 }, { "epoch": 21.279310344827586, "grad_norm": 0.7257872223854065, "learning_rate": 2.5518620689655176e-05, "loss": 0.2672, "step": 6171 }, { "epoch": 21.282758620689656, "grad_norm": 0.6072506308555603, "learning_rate": 2.5522758620689654e-05, "loss": 0.2574, "step": 6172 }, { "epoch": 21.286206896551725, "grad_norm": 0.8142940998077393, "learning_rate": 2.552689655172414e-05, "loss": 0.2478, "step": 6173 }, { "epoch": 21.28965517241379, "grad_norm": 0.6908814311027527, "learning_rate": 2.5531034482758622e-05, "loss": 0.2402, "step": 6174 }, { "epoch": 21.29310344827586, "grad_norm": 0.6658610701560974, "learning_rate": 2.5535172413793104e-05, "loss": 0.234, "step": 6175 }, { "epoch": 21.29655172413793, "grad_norm": 0.9332767128944397, "learning_rate": 2.5539310344827586e-05, "loss": 0.2697, "step": 6176 }, { "epoch": 21.3, "grad_norm": 1.1476757526397705, "learning_rate": 2.5543448275862068e-05, "loss": 0.2512, "step": 6177 }, { "epoch": 21.30344827586207, "grad_norm": 0.7275797128677368, "learning_rate": 2.5547586206896553e-05, "loss": 0.2709, "step": 6178 }, { "epoch": 21.306896551724137, "grad_norm": 1.04140043258667, "learning_rate": 2.5551724137931035e-05, "loss": 0.2829, "step": 6179 }, { "epoch": 21.310344827586206, "grad_norm": 1.7180132865905762, "learning_rate": 2.5555862068965517e-05, "loss": 0.276, "step": 6180 }, { "epoch": 21.313793103448276, "grad_norm": 0.8315867185592651, "learning_rate": 2.556e-05, "loss": 0.2638, "step": 6181 }, { "epoch": 21.317241379310346, "grad_norm": 0.797853410243988, "learning_rate": 2.5564137931034485e-05, "loss": 0.2532, "step": 6182 }, { "epoch": 21.320689655172412, "grad_norm": 1.4228507280349731, "learning_rate": 2.5568275862068967e-05, "loss": 0.2567, "step": 6183 }, { "epoch": 21.324137931034482, "grad_norm": 0.8316570520401001, "learning_rate": 2.557241379310345e-05, "loss": 0.2649, "step": 6184 }, { "epoch": 21.32758620689655, "grad_norm": 1.3734372854232788, "learning_rate": 2.557655172413793e-05, "loss": 0.227, "step": 6185 }, { "epoch": 21.33103448275862, "grad_norm": 0.6349250674247742, "learning_rate": 2.5580689655172416e-05, "loss": 0.217, "step": 6186 }, { "epoch": 21.33448275862069, "grad_norm": 0.8997179865837097, "learning_rate": 2.5584827586206898e-05, "loss": 0.267, "step": 6187 }, { "epoch": 21.337931034482757, "grad_norm": 1.2974745035171509, "learning_rate": 2.5588965517241377e-05, "loss": 0.2633, "step": 6188 }, { "epoch": 21.341379310344827, "grad_norm": 1.35606050491333, "learning_rate": 2.5593103448275862e-05, "loss": 0.2985, "step": 6189 }, { "epoch": 21.344827586206897, "grad_norm": 1.4044349193572998, "learning_rate": 2.5597241379310344e-05, "loss": 0.3298, "step": 6190 }, { "epoch": 21.348275862068967, "grad_norm": 0.5977023243904114, "learning_rate": 2.560137931034483e-05, "loss": 0.3253, "step": 6191 }, { "epoch": 21.351724137931033, "grad_norm": 0.6604309678077698, "learning_rate": 2.560551724137931e-05, "loss": 0.3078, "step": 6192 }, { "epoch": 21.355172413793102, "grad_norm": 0.9467971324920654, "learning_rate": 2.5609655172413794e-05, "loss": 0.2729, "step": 6193 }, { "epoch": 21.358620689655172, "grad_norm": 1.0446709394454956, "learning_rate": 2.5613793103448276e-05, "loss": 0.256, "step": 6194 }, { "epoch": 21.362068965517242, "grad_norm": 0.8063238859176636, "learning_rate": 2.561793103448276e-05, "loss": 0.2979, "step": 6195 }, { "epoch": 21.36551724137931, "grad_norm": 0.7393348217010498, "learning_rate": 2.5622068965517243e-05, "loss": 0.2725, "step": 6196 }, { "epoch": 21.368965517241378, "grad_norm": 0.6957042217254639, "learning_rate": 2.5626206896551725e-05, "loss": 0.2718, "step": 6197 }, { "epoch": 21.372413793103448, "grad_norm": 1.0841844081878662, "learning_rate": 2.5630344827586207e-05, "loss": 0.3017, "step": 6198 }, { "epoch": 21.375862068965517, "grad_norm": 0.6907911896705627, "learning_rate": 2.5634482758620692e-05, "loss": 0.2526, "step": 6199 }, { "epoch": 21.379310344827587, "grad_norm": 0.7074601650238037, "learning_rate": 2.5638620689655175e-05, "loss": 0.2647, "step": 6200 }, { "epoch": 21.382758620689657, "grad_norm": 0.6110197901725769, "learning_rate": 2.5642758620689653e-05, "loss": 0.2266, "step": 6201 }, { "epoch": 21.386206896551723, "grad_norm": 0.6931840777397156, "learning_rate": 2.564689655172414e-05, "loss": 0.2198, "step": 6202 }, { "epoch": 21.389655172413793, "grad_norm": 1.0900390148162842, "learning_rate": 2.565103448275862e-05, "loss": 0.2698, "step": 6203 }, { "epoch": 21.393103448275863, "grad_norm": 1.0985448360443115, "learning_rate": 2.5655172413793106e-05, "loss": 0.2467, "step": 6204 }, { "epoch": 21.396551724137932, "grad_norm": 0.7518427968025208, "learning_rate": 2.5659310344827585e-05, "loss": 0.2905, "step": 6205 }, { "epoch": 21.4, "grad_norm": 0.8248470425605774, "learning_rate": 2.566344827586207e-05, "loss": 0.2216, "step": 6206 }, { "epoch": 21.40344827586207, "grad_norm": 1.4533216953277588, "learning_rate": 2.5667586206896552e-05, "loss": 0.2569, "step": 6207 }, { "epoch": 21.406896551724138, "grad_norm": 0.8342261910438538, "learning_rate": 2.5671724137931037e-05, "loss": 0.2487, "step": 6208 }, { "epoch": 21.410344827586208, "grad_norm": 1.0956872701644897, "learning_rate": 2.5675862068965516e-05, "loss": 0.2663, "step": 6209 }, { "epoch": 21.413793103448278, "grad_norm": 1.5717108249664307, "learning_rate": 2.568e-05, "loss": 0.2554, "step": 6210 }, { "epoch": 21.417241379310344, "grad_norm": 1.4137861728668213, "learning_rate": 2.5684137931034483e-05, "loss": 0.2526, "step": 6211 }, { "epoch": 21.420689655172414, "grad_norm": 6.740298271179199, "learning_rate": 2.568827586206897e-05, "loss": 0.2294, "step": 6212 }, { "epoch": 21.424137931034483, "grad_norm": 1.0960007905960083, "learning_rate": 2.5692413793103448e-05, "loss": 0.2606, "step": 6213 }, { "epoch": 21.427586206896553, "grad_norm": 0.958663284778595, "learning_rate": 2.569655172413793e-05, "loss": 0.2639, "step": 6214 }, { "epoch": 21.43103448275862, "grad_norm": 1.668923258781433, "learning_rate": 2.5700689655172415e-05, "loss": 0.3569, "step": 6215 }, { "epoch": 21.43448275862069, "grad_norm": 0.887170672416687, "learning_rate": 2.5704827586206897e-05, "loss": 0.3633, "step": 6216 }, { "epoch": 21.43793103448276, "grad_norm": 0.6295938491821289, "learning_rate": 2.570896551724138e-05, "loss": 0.2725, "step": 6217 }, { "epoch": 21.44137931034483, "grad_norm": 0.6278718113899231, "learning_rate": 2.571310344827586e-05, "loss": 0.2952, "step": 6218 }, { "epoch": 21.444827586206898, "grad_norm": 1.0322951078414917, "learning_rate": 2.5717241379310346e-05, "loss": 0.2969, "step": 6219 }, { "epoch": 21.448275862068964, "grad_norm": 1.015030026435852, "learning_rate": 2.572137931034483e-05, "loss": 0.2416, "step": 6220 }, { "epoch": 21.451724137931034, "grad_norm": 0.8743013739585876, "learning_rate": 2.572551724137931e-05, "loss": 0.2934, "step": 6221 }, { "epoch": 21.455172413793104, "grad_norm": 1.4117817878723145, "learning_rate": 2.5729655172413792e-05, "loss": 0.3263, "step": 6222 }, { "epoch": 21.458620689655174, "grad_norm": 1.0406032800674438, "learning_rate": 2.5733793103448278e-05, "loss": 0.2687, "step": 6223 }, { "epoch": 21.46206896551724, "grad_norm": 1.074947714805603, "learning_rate": 2.573793103448276e-05, "loss": 0.2468, "step": 6224 }, { "epoch": 21.46551724137931, "grad_norm": 0.8576462268829346, "learning_rate": 2.5742068965517245e-05, "loss": 0.2398, "step": 6225 }, { "epoch": 21.46896551724138, "grad_norm": 1.2200676202774048, "learning_rate": 2.5746206896551724e-05, "loss": 0.2306, "step": 6226 }, { "epoch": 21.47241379310345, "grad_norm": 0.9318497180938721, "learning_rate": 2.5750344827586206e-05, "loss": 0.249, "step": 6227 }, { "epoch": 21.47586206896552, "grad_norm": 0.6166543960571289, "learning_rate": 2.575448275862069e-05, "loss": 0.2397, "step": 6228 }, { "epoch": 21.479310344827585, "grad_norm": 1.1099272966384888, "learning_rate": 2.5758620689655173e-05, "loss": 0.2506, "step": 6229 }, { "epoch": 21.482758620689655, "grad_norm": 1.0104843378067017, "learning_rate": 2.5762758620689655e-05, "loss": 0.2558, "step": 6230 }, { "epoch": 21.486206896551725, "grad_norm": 1.1032483577728271, "learning_rate": 2.5766896551724137e-05, "loss": 0.2837, "step": 6231 }, { "epoch": 21.489655172413794, "grad_norm": 1.1207460165023804, "learning_rate": 2.5771034482758623e-05, "loss": 0.2528, "step": 6232 }, { "epoch": 21.49310344827586, "grad_norm": 1.2595038414001465, "learning_rate": 2.5775172413793105e-05, "loss": 0.2562, "step": 6233 }, { "epoch": 21.49655172413793, "grad_norm": 0.7573560476303101, "learning_rate": 2.5779310344827587e-05, "loss": 0.2531, "step": 6234 }, { "epoch": 21.5, "grad_norm": 1.5214595794677734, "learning_rate": 2.578344827586207e-05, "loss": 0.2464, "step": 6235 }, { "epoch": 21.50344827586207, "grad_norm": 0.6965741515159607, "learning_rate": 2.5787586206896554e-05, "loss": 0.2736, "step": 6236 }, { "epoch": 21.50689655172414, "grad_norm": 1.0370601415634155, "learning_rate": 2.5791724137931036e-05, "loss": 0.2323, "step": 6237 }, { "epoch": 21.510344827586206, "grad_norm": 1.017190933227539, "learning_rate": 2.5795862068965518e-05, "loss": 0.2781, "step": 6238 }, { "epoch": 21.513793103448275, "grad_norm": 1.2083632946014404, "learning_rate": 2.58e-05, "loss": 0.3277, "step": 6239 }, { "epoch": 21.517241379310345, "grad_norm": 1.49679696559906, "learning_rate": 2.5804137931034482e-05, "loss": 0.3979, "step": 6240 }, { "epoch": 21.520689655172415, "grad_norm": 1.069202184677124, "learning_rate": 2.5808275862068968e-05, "loss": 0.3126, "step": 6241 }, { "epoch": 21.52413793103448, "grad_norm": 0.9541476368904114, "learning_rate": 2.5812413793103446e-05, "loss": 0.272, "step": 6242 }, { "epoch": 21.52758620689655, "grad_norm": 0.7339330911636353, "learning_rate": 2.5816551724137932e-05, "loss": 0.3118, "step": 6243 }, { "epoch": 21.53103448275862, "grad_norm": 0.8024728298187256, "learning_rate": 2.5820689655172414e-05, "loss": 0.3149, "step": 6244 }, { "epoch": 21.53448275862069, "grad_norm": 0.9846231341362, "learning_rate": 2.58248275862069e-05, "loss": 0.2763, "step": 6245 }, { "epoch": 21.53793103448276, "grad_norm": 1.1305077075958252, "learning_rate": 2.5828965517241378e-05, "loss": 0.2696, "step": 6246 }, { "epoch": 21.541379310344826, "grad_norm": 1.4371579885482788, "learning_rate": 2.5833103448275863e-05, "loss": 0.3015, "step": 6247 }, { "epoch": 21.544827586206896, "grad_norm": 0.8044078946113586, "learning_rate": 2.5837241379310345e-05, "loss": 0.2991, "step": 6248 }, { "epoch": 21.548275862068966, "grad_norm": 0.6465801000595093, "learning_rate": 2.584137931034483e-05, "loss": 0.2582, "step": 6249 }, { "epoch": 21.551724137931036, "grad_norm": 0.8785936236381531, "learning_rate": 2.584551724137931e-05, "loss": 0.257, "step": 6250 }, { "epoch": 21.555172413793102, "grad_norm": 0.7763660550117493, "learning_rate": 2.5849655172413795e-05, "loss": 0.251, "step": 6251 }, { "epoch": 21.55862068965517, "grad_norm": 0.754062831401825, "learning_rate": 2.5853793103448277e-05, "loss": 0.2428, "step": 6252 }, { "epoch": 21.56206896551724, "grad_norm": 1.1671830415725708, "learning_rate": 2.585793103448276e-05, "loss": 0.2731, "step": 6253 }, { "epoch": 21.56551724137931, "grad_norm": 0.6451064944267273, "learning_rate": 2.586206896551724e-05, "loss": 0.2652, "step": 6254 }, { "epoch": 21.56896551724138, "grad_norm": 1.0172914266586304, "learning_rate": 2.5866206896551723e-05, "loss": 0.2712, "step": 6255 }, { "epoch": 21.572413793103447, "grad_norm": 0.6894949674606323, "learning_rate": 2.5870344827586208e-05, "loss": 0.2497, "step": 6256 }, { "epoch": 21.575862068965517, "grad_norm": 0.742127001285553, "learning_rate": 2.587448275862069e-05, "loss": 0.2404, "step": 6257 }, { "epoch": 21.579310344827586, "grad_norm": 0.940788984298706, "learning_rate": 2.5878620689655176e-05, "loss": 0.2525, "step": 6258 }, { "epoch": 21.582758620689656, "grad_norm": 1.3667961359024048, "learning_rate": 2.5882758620689654e-05, "loss": 0.2442, "step": 6259 }, { "epoch": 21.586206896551722, "grad_norm": 2.1678972244262695, "learning_rate": 2.588689655172414e-05, "loss": 0.2473, "step": 6260 }, { "epoch": 21.589655172413792, "grad_norm": 1.1260243654251099, "learning_rate": 2.589103448275862e-05, "loss": 0.2609, "step": 6261 }, { "epoch": 21.593103448275862, "grad_norm": 1.1879299879074097, "learning_rate": 2.5895172413793107e-05, "loss": 0.2741, "step": 6262 }, { "epoch": 21.59655172413793, "grad_norm": 1.2933710813522339, "learning_rate": 2.5899310344827586e-05, "loss": 0.2512, "step": 6263 }, { "epoch": 21.6, "grad_norm": 1.2385305166244507, "learning_rate": 2.590344827586207e-05, "loss": 0.2912, "step": 6264 }, { "epoch": 21.603448275862068, "grad_norm": 2.5921742916107178, "learning_rate": 2.5907586206896553e-05, "loss": 0.3046, "step": 6265 }, { "epoch": 21.606896551724137, "grad_norm": 0.8011339902877808, "learning_rate": 2.5911724137931035e-05, "loss": 0.3167, "step": 6266 }, { "epoch": 21.610344827586207, "grad_norm": 0.6559858322143555, "learning_rate": 2.5915862068965517e-05, "loss": 0.2867, "step": 6267 }, { "epoch": 21.613793103448277, "grad_norm": 1.2371883392333984, "learning_rate": 2.592e-05, "loss": 0.2916, "step": 6268 }, { "epoch": 21.617241379310343, "grad_norm": 0.7299272418022156, "learning_rate": 2.5924137931034485e-05, "loss": 0.2396, "step": 6269 }, { "epoch": 21.620689655172413, "grad_norm": 1.1488338708877563, "learning_rate": 2.5928275862068967e-05, "loss": 0.2534, "step": 6270 }, { "epoch": 21.624137931034483, "grad_norm": 0.6646484732627869, "learning_rate": 2.593241379310345e-05, "loss": 0.2742, "step": 6271 }, { "epoch": 21.627586206896552, "grad_norm": 0.7063506245613098, "learning_rate": 2.593655172413793e-05, "loss": 0.2448, "step": 6272 }, { "epoch": 21.631034482758622, "grad_norm": 0.8149585723876953, "learning_rate": 2.5940689655172416e-05, "loss": 0.2675, "step": 6273 }, { "epoch": 21.63448275862069, "grad_norm": 0.7335019111633301, "learning_rate": 2.5944827586206898e-05, "loss": 0.2714, "step": 6274 }, { "epoch": 21.637931034482758, "grad_norm": 0.6950780749320984, "learning_rate": 2.594896551724138e-05, "loss": 0.2687, "step": 6275 }, { "epoch": 21.641379310344828, "grad_norm": 0.7007794380187988, "learning_rate": 2.5953103448275862e-05, "loss": 0.2744, "step": 6276 }, { "epoch": 21.644827586206898, "grad_norm": 0.5907039046287537, "learning_rate": 2.5957241379310347e-05, "loss": 0.2699, "step": 6277 }, { "epoch": 21.648275862068964, "grad_norm": 2.6630496978759766, "learning_rate": 2.596137931034483e-05, "loss": 0.233, "step": 6278 }, { "epoch": 21.651724137931033, "grad_norm": 1.044185757637024, "learning_rate": 2.5965517241379308e-05, "loss": 0.2635, "step": 6279 }, { "epoch": 21.655172413793103, "grad_norm": 0.7566593885421753, "learning_rate": 2.5969655172413794e-05, "loss": 0.2532, "step": 6280 }, { "epoch": 21.658620689655173, "grad_norm": 0.9864738583564758, "learning_rate": 2.5973793103448276e-05, "loss": 0.2662, "step": 6281 }, { "epoch": 21.662068965517243, "grad_norm": 1.022390365600586, "learning_rate": 2.597793103448276e-05, "loss": 0.2718, "step": 6282 }, { "epoch": 21.66551724137931, "grad_norm": 1.143666386604309, "learning_rate": 2.598206896551724e-05, "loss": 0.2461, "step": 6283 }, { "epoch": 21.66896551724138, "grad_norm": 0.8964836597442627, "learning_rate": 2.5986206896551725e-05, "loss": 0.2519, "step": 6284 }, { "epoch": 21.67241379310345, "grad_norm": 1.7233455181121826, "learning_rate": 2.5990344827586207e-05, "loss": 0.272, "step": 6285 }, { "epoch": 21.675862068965518, "grad_norm": 1.314663290977478, "learning_rate": 2.5994482758620692e-05, "loss": 0.2626, "step": 6286 }, { "epoch": 21.679310344827588, "grad_norm": 1.7494999170303345, "learning_rate": 2.599862068965517e-05, "loss": 0.2292, "step": 6287 }, { "epoch": 21.682758620689654, "grad_norm": 1.099787712097168, "learning_rate": 2.6002758620689656e-05, "loss": 0.2699, "step": 6288 }, { "epoch": 21.686206896551724, "grad_norm": 1.2414830923080444, "learning_rate": 2.600689655172414e-05, "loss": 0.2939, "step": 6289 }, { "epoch": 21.689655172413794, "grad_norm": 2.170959949493408, "learning_rate": 2.601103448275862e-05, "loss": 0.364, "step": 6290 }, { "epoch": 21.693103448275863, "grad_norm": 0.8928316235542297, "learning_rate": 2.6015172413793106e-05, "loss": 0.3146, "step": 6291 }, { "epoch": 21.69655172413793, "grad_norm": 0.6650879979133606, "learning_rate": 2.6019310344827585e-05, "loss": 0.2756, "step": 6292 }, { "epoch": 21.7, "grad_norm": 1.1365736722946167, "learning_rate": 2.602344827586207e-05, "loss": 0.2681, "step": 6293 }, { "epoch": 21.70344827586207, "grad_norm": 0.8422192931175232, "learning_rate": 2.6027586206896552e-05, "loss": 0.2857, "step": 6294 }, { "epoch": 21.70689655172414, "grad_norm": 0.7209210395812988, "learning_rate": 2.6031724137931037e-05, "loss": 0.2889, "step": 6295 }, { "epoch": 21.71034482758621, "grad_norm": 0.5594603419303894, "learning_rate": 2.6035862068965516e-05, "loss": 0.2673, "step": 6296 }, { "epoch": 21.713793103448275, "grad_norm": 0.596533477306366, "learning_rate": 2.604e-05, "loss": 0.2757, "step": 6297 }, { "epoch": 21.717241379310344, "grad_norm": 1.4844216108322144, "learning_rate": 2.6044137931034483e-05, "loss": 0.2748, "step": 6298 }, { "epoch": 21.720689655172414, "grad_norm": 0.5806491374969482, "learning_rate": 2.604827586206897e-05, "loss": 0.2903, "step": 6299 }, { "epoch": 21.724137931034484, "grad_norm": 1.084670066833496, "learning_rate": 2.6052413793103447e-05, "loss": 0.2605, "step": 6300 }, { "epoch": 21.72758620689655, "grad_norm": 0.7037180066108704, "learning_rate": 2.6056551724137933e-05, "loss": 0.2362, "step": 6301 }, { "epoch": 21.73103448275862, "grad_norm": 0.7203189730644226, "learning_rate": 2.6060689655172415e-05, "loss": 0.2807, "step": 6302 }, { "epoch": 21.73448275862069, "grad_norm": 1.1502571105957031, "learning_rate": 2.6064827586206897e-05, "loss": 0.2506, "step": 6303 }, { "epoch": 21.73793103448276, "grad_norm": 0.8874919414520264, "learning_rate": 2.606896551724138e-05, "loss": 0.247, "step": 6304 }, { "epoch": 21.74137931034483, "grad_norm": 1.5804411172866821, "learning_rate": 2.607310344827586e-05, "loss": 0.2566, "step": 6305 }, { "epoch": 21.744827586206895, "grad_norm": 1.1727261543273926, "learning_rate": 2.6077241379310346e-05, "loss": 0.2485, "step": 6306 }, { "epoch": 21.748275862068965, "grad_norm": 0.7528529167175293, "learning_rate": 2.608137931034483e-05, "loss": 0.2539, "step": 6307 }, { "epoch": 21.751724137931035, "grad_norm": 0.9893821477890015, "learning_rate": 2.608551724137931e-05, "loss": 0.2646, "step": 6308 }, { "epoch": 21.755172413793105, "grad_norm": 1.3662431240081787, "learning_rate": 2.6089655172413792e-05, "loss": 0.2464, "step": 6309 }, { "epoch": 21.75862068965517, "grad_norm": 0.8162212371826172, "learning_rate": 2.6093793103448278e-05, "loss": 0.2844, "step": 6310 }, { "epoch": 21.76206896551724, "grad_norm": 1.1833189725875854, "learning_rate": 2.609793103448276e-05, "loss": 0.3054, "step": 6311 }, { "epoch": 21.76551724137931, "grad_norm": 1.250594139099121, "learning_rate": 2.6102068965517242e-05, "loss": 0.2406, "step": 6312 }, { "epoch": 21.76896551724138, "grad_norm": 1.261078119277954, "learning_rate": 2.6106206896551724e-05, "loss": 0.2389, "step": 6313 }, { "epoch": 21.77241379310345, "grad_norm": 1.3935353755950928, "learning_rate": 2.611034482758621e-05, "loss": 0.2934, "step": 6314 }, { "epoch": 21.775862068965516, "grad_norm": 1.3462300300598145, "learning_rate": 2.611448275862069e-05, "loss": 0.3797, "step": 6315 }, { "epoch": 21.779310344827586, "grad_norm": 0.9569682478904724, "learning_rate": 2.611862068965517e-05, "loss": 0.3116, "step": 6316 }, { "epoch": 21.782758620689656, "grad_norm": 0.9142968058586121, "learning_rate": 2.6122758620689655e-05, "loss": 0.269, "step": 6317 }, { "epoch": 21.786206896551725, "grad_norm": 0.7770441770553589, "learning_rate": 2.6126896551724137e-05, "loss": 0.2606, "step": 6318 }, { "epoch": 21.78965517241379, "grad_norm": 1.1008206605911255, "learning_rate": 2.6131034482758623e-05, "loss": 0.2777, "step": 6319 }, { "epoch": 21.79310344827586, "grad_norm": 1.6200673580169678, "learning_rate": 2.61351724137931e-05, "loss": 0.2605, "step": 6320 }, { "epoch": 21.79655172413793, "grad_norm": 0.8013526797294617, "learning_rate": 2.6139310344827587e-05, "loss": 0.2806, "step": 6321 }, { "epoch": 21.8, "grad_norm": 0.960000216960907, "learning_rate": 2.614344827586207e-05, "loss": 0.287, "step": 6322 }, { "epoch": 21.80344827586207, "grad_norm": 0.5271003246307373, "learning_rate": 2.6147586206896554e-05, "loss": 0.2538, "step": 6323 }, { "epoch": 21.806896551724137, "grad_norm": 1.1585346460342407, "learning_rate": 2.6151724137931036e-05, "loss": 0.2371, "step": 6324 }, { "epoch": 21.810344827586206, "grad_norm": 0.7694153189659119, "learning_rate": 2.6155862068965518e-05, "loss": 0.2792, "step": 6325 }, { "epoch": 21.813793103448276, "grad_norm": 0.843610405921936, "learning_rate": 2.616e-05, "loss": 0.2498, "step": 6326 }, { "epoch": 21.817241379310346, "grad_norm": 1.0303274393081665, "learning_rate": 2.6164137931034486e-05, "loss": 0.2602, "step": 6327 }, { "epoch": 21.820689655172412, "grad_norm": 0.7553500533103943, "learning_rate": 2.6168275862068968e-05, "loss": 0.2349, "step": 6328 }, { "epoch": 21.824137931034482, "grad_norm": 1.2039530277252197, "learning_rate": 2.6172413793103446e-05, "loss": 0.2765, "step": 6329 }, { "epoch": 21.82758620689655, "grad_norm": 0.6993848085403442, "learning_rate": 2.6176551724137932e-05, "loss": 0.2549, "step": 6330 }, { "epoch": 21.83103448275862, "grad_norm": 1.1517739295959473, "learning_rate": 2.6180689655172414e-05, "loss": 0.2608, "step": 6331 }, { "epoch": 21.83448275862069, "grad_norm": 1.0935813188552856, "learning_rate": 2.61848275862069e-05, "loss": 0.2677, "step": 6332 }, { "epoch": 21.837931034482757, "grad_norm": 1.4944603443145752, "learning_rate": 2.6188965517241378e-05, "loss": 0.2443, "step": 6333 }, { "epoch": 21.841379310344827, "grad_norm": 1.4882864952087402, "learning_rate": 2.6193103448275863e-05, "loss": 0.2449, "step": 6334 }, { "epoch": 21.844827586206897, "grad_norm": 0.8588452339172363, "learning_rate": 2.6197241379310345e-05, "loss": 0.2365, "step": 6335 }, { "epoch": 21.848275862068967, "grad_norm": 1.1028106212615967, "learning_rate": 2.620137931034483e-05, "loss": 0.2851, "step": 6336 }, { "epoch": 21.851724137931036, "grad_norm": 1.4460231065750122, "learning_rate": 2.620551724137931e-05, "loss": 0.2498, "step": 6337 }, { "epoch": 21.855172413793102, "grad_norm": 1.3065990209579468, "learning_rate": 2.6209655172413795e-05, "loss": 0.2659, "step": 6338 }, { "epoch": 21.858620689655172, "grad_norm": 1.2544924020767212, "learning_rate": 2.6213793103448277e-05, "loss": 0.2744, "step": 6339 }, { "epoch": 21.862068965517242, "grad_norm": 1.7459015846252441, "learning_rate": 2.6217931034482762e-05, "loss": 0.3967, "step": 6340 }, { "epoch": 21.86551724137931, "grad_norm": 0.71534663438797, "learning_rate": 2.622206896551724e-05, "loss": 0.3593, "step": 6341 }, { "epoch": 21.868965517241378, "grad_norm": 0.7277801036834717, "learning_rate": 2.6226206896551723e-05, "loss": 0.3144, "step": 6342 }, { "epoch": 21.872413793103448, "grad_norm": 0.7277926802635193, "learning_rate": 2.6230344827586208e-05, "loss": 0.29, "step": 6343 }, { "epoch": 21.875862068965517, "grad_norm": 0.6391090750694275, "learning_rate": 2.623448275862069e-05, "loss": 0.2678, "step": 6344 }, { "epoch": 21.879310344827587, "grad_norm": 0.8498387932777405, "learning_rate": 2.6238620689655172e-05, "loss": 0.256, "step": 6345 }, { "epoch": 21.882758620689657, "grad_norm": 0.7851689457893372, "learning_rate": 2.6242758620689654e-05, "loss": 0.2725, "step": 6346 }, { "epoch": 21.886206896551723, "grad_norm": 0.948388397693634, "learning_rate": 2.624689655172414e-05, "loss": 0.2662, "step": 6347 }, { "epoch": 21.889655172413793, "grad_norm": 1.4125182628631592, "learning_rate": 2.625103448275862e-05, "loss": 0.2947, "step": 6348 }, { "epoch": 21.893103448275863, "grad_norm": 0.677473247051239, "learning_rate": 2.6255172413793104e-05, "loss": 0.2702, "step": 6349 }, { "epoch": 21.896551724137932, "grad_norm": 0.5961211323738098, "learning_rate": 2.6259310344827586e-05, "loss": 0.2677, "step": 6350 }, { "epoch": 21.9, "grad_norm": 0.7396783828735352, "learning_rate": 2.626344827586207e-05, "loss": 0.2549, "step": 6351 }, { "epoch": 21.90344827586207, "grad_norm": 0.9365314245223999, "learning_rate": 2.6267586206896553e-05, "loss": 0.3064, "step": 6352 }, { "epoch": 21.906896551724138, "grad_norm": 0.9441137909889221, "learning_rate": 2.6271724137931035e-05, "loss": 0.2548, "step": 6353 }, { "epoch": 21.910344827586208, "grad_norm": 0.6943730711936951, "learning_rate": 2.6275862068965517e-05, "loss": 0.2338, "step": 6354 }, { "epoch": 21.913793103448278, "grad_norm": 0.8508605360984802, "learning_rate": 2.628e-05, "loss": 0.2419, "step": 6355 }, { "epoch": 21.917241379310344, "grad_norm": 0.7210764288902283, "learning_rate": 2.6284137931034485e-05, "loss": 0.2491, "step": 6356 }, { "epoch": 21.920689655172414, "grad_norm": 1.0079716444015503, "learning_rate": 2.6288275862068967e-05, "loss": 0.2424, "step": 6357 }, { "epoch": 21.924137931034483, "grad_norm": 0.9018118381500244, "learning_rate": 2.629241379310345e-05, "loss": 0.2613, "step": 6358 }, { "epoch": 21.927586206896553, "grad_norm": 0.8291206359863281, "learning_rate": 2.629655172413793e-05, "loss": 0.2327, "step": 6359 }, { "epoch": 21.93103448275862, "grad_norm": 1.1386399269104004, "learning_rate": 2.6300689655172416e-05, "loss": 0.2605, "step": 6360 }, { "epoch": 21.93448275862069, "grad_norm": 0.8577131032943726, "learning_rate": 2.6304827586206898e-05, "loss": 0.2649, "step": 6361 }, { "epoch": 21.93793103448276, "grad_norm": 0.8313112854957581, "learning_rate": 2.630896551724138e-05, "loss": 0.2631, "step": 6362 }, { "epoch": 21.94137931034483, "grad_norm": 0.9683031439781189, "learning_rate": 2.6313103448275862e-05, "loss": 0.2679, "step": 6363 }, { "epoch": 21.944827586206898, "grad_norm": 0.8809060454368591, "learning_rate": 2.6317241379310347e-05, "loss": 0.2854, "step": 6364 }, { "epoch": 21.948275862068964, "grad_norm": 2.582979202270508, "learning_rate": 2.632137931034483e-05, "loss": 0.4376, "step": 6365 }, { "epoch": 21.951724137931034, "grad_norm": 0.8238922357559204, "learning_rate": 2.632551724137931e-05, "loss": 0.3178, "step": 6366 }, { "epoch": 21.955172413793104, "grad_norm": 0.934797465801239, "learning_rate": 2.6329655172413793e-05, "loss": 0.2609, "step": 6367 }, { "epoch": 21.958620689655174, "grad_norm": 0.6468838453292847, "learning_rate": 2.6333793103448276e-05, "loss": 0.2658, "step": 6368 }, { "epoch": 21.96206896551724, "grad_norm": 0.6049526333808899, "learning_rate": 2.633793103448276e-05, "loss": 0.2676, "step": 6369 }, { "epoch": 21.96551724137931, "grad_norm": 0.7617217898368835, "learning_rate": 2.634206896551724e-05, "loss": 0.2444, "step": 6370 }, { "epoch": 21.96896551724138, "grad_norm": 0.7575780153274536, "learning_rate": 2.6346206896551725e-05, "loss": 0.2533, "step": 6371 }, { "epoch": 21.97241379310345, "grad_norm": 0.707607626914978, "learning_rate": 2.6350344827586207e-05, "loss": 0.3244, "step": 6372 }, { "epoch": 21.97586206896552, "grad_norm": 0.8517056107521057, "learning_rate": 2.6354482758620692e-05, "loss": 0.261, "step": 6373 }, { "epoch": 21.979310344827585, "grad_norm": 0.9384645223617554, "learning_rate": 2.635862068965517e-05, "loss": 0.2523, "step": 6374 }, { "epoch": 21.982758620689655, "grad_norm": 0.8221032023429871, "learning_rate": 2.6362758620689656e-05, "loss": 0.251, "step": 6375 }, { "epoch": 21.986206896551725, "grad_norm": 0.6939873099327087, "learning_rate": 2.636689655172414e-05, "loss": 0.2581, "step": 6376 }, { "epoch": 21.989655172413794, "grad_norm": 0.7058740258216858, "learning_rate": 2.6371034482758624e-05, "loss": 0.2822, "step": 6377 }, { "epoch": 21.99310344827586, "grad_norm": 1.4003925323486328, "learning_rate": 2.6375172413793102e-05, "loss": 0.2911, "step": 6378 }, { "epoch": 21.99655172413793, "grad_norm": 0.9932353496551514, "learning_rate": 2.6379310344827588e-05, "loss": 0.2992, "step": 6379 }, { "epoch": 22.0, "grad_norm": 1.2008403539657593, "learning_rate": 2.638344827586207e-05, "loss": 0.3441, "step": 6380 }, { "epoch": 22.00344827586207, "grad_norm": 0.9706447720527649, "learning_rate": 2.6387586206896552e-05, "loss": 0.2859, "step": 6381 }, { "epoch": 22.00689655172414, "grad_norm": 1.0554739236831665, "learning_rate": 2.6391724137931034e-05, "loss": 0.2936, "step": 6382 }, { "epoch": 22.010344827586206, "grad_norm": 0.6314480900764465, "learning_rate": 2.6395862068965516e-05, "loss": 0.2856, "step": 6383 }, { "epoch": 22.013793103448275, "grad_norm": 0.5288445949554443, "learning_rate": 2.64e-05, "loss": 0.2798, "step": 6384 }, { "epoch": 22.017241379310345, "grad_norm": 0.6942000985145569, "learning_rate": 2.6404137931034483e-05, "loss": 0.2386, "step": 6385 }, { "epoch": 22.020689655172415, "grad_norm": 0.6162366271018982, "learning_rate": 2.6408275862068965e-05, "loss": 0.2607, "step": 6386 }, { "epoch": 22.02413793103448, "grad_norm": 0.7102320790290833, "learning_rate": 2.6412413793103447e-05, "loss": 0.2757, "step": 6387 }, { "epoch": 22.02758620689655, "grad_norm": 0.8174973726272583, "learning_rate": 2.6416551724137933e-05, "loss": 0.2981, "step": 6388 }, { "epoch": 22.03103448275862, "grad_norm": 0.5604918599128723, "learning_rate": 2.6420689655172415e-05, "loss": 0.2475, "step": 6389 }, { "epoch": 22.03448275862069, "grad_norm": 0.6291036009788513, "learning_rate": 2.64248275862069e-05, "loss": 0.2426, "step": 6390 }, { "epoch": 22.03793103448276, "grad_norm": 0.8779724836349487, "learning_rate": 2.642896551724138e-05, "loss": 0.2685, "step": 6391 }, { "epoch": 22.041379310344826, "grad_norm": 1.0778110027313232, "learning_rate": 2.6433103448275864e-05, "loss": 0.2584, "step": 6392 }, { "epoch": 22.044827586206896, "grad_norm": 1.0991170406341553, "learning_rate": 2.6437241379310346e-05, "loss": 0.2369, "step": 6393 }, { "epoch": 22.048275862068966, "grad_norm": 0.8424808979034424, "learning_rate": 2.6441379310344828e-05, "loss": 0.2229, "step": 6394 }, { "epoch": 22.051724137931036, "grad_norm": 0.9203599095344543, "learning_rate": 2.644551724137931e-05, "loss": 0.2662, "step": 6395 }, { "epoch": 22.055172413793102, "grad_norm": 0.7336696982383728, "learning_rate": 2.6449655172413792e-05, "loss": 0.2283, "step": 6396 }, { "epoch": 22.05862068965517, "grad_norm": 1.1387189626693726, "learning_rate": 2.6453793103448278e-05, "loss": 0.2441, "step": 6397 }, { "epoch": 22.06206896551724, "grad_norm": 0.928222119808197, "learning_rate": 2.645793103448276e-05, "loss": 0.2282, "step": 6398 }, { "epoch": 22.06551724137931, "grad_norm": 1.409785509109497, "learning_rate": 2.6462068965517242e-05, "loss": 0.2256, "step": 6399 }, { "epoch": 22.06896551724138, "grad_norm": 0.6043685674667358, "learning_rate": 2.6466206896551724e-05, "loss": 0.2144, "step": 6400 }, { "epoch": 22.072413793103447, "grad_norm": 1.1024364233016968, "learning_rate": 2.647034482758621e-05, "loss": 0.259, "step": 6401 }, { "epoch": 22.075862068965517, "grad_norm": 0.8576985597610474, "learning_rate": 2.647448275862069e-05, "loss": 0.2158, "step": 6402 }, { "epoch": 22.079310344827586, "grad_norm": 1.1638288497924805, "learning_rate": 2.6478620689655173e-05, "loss": 0.2235, "step": 6403 }, { "epoch": 22.082758620689656, "grad_norm": 1.076985239982605, "learning_rate": 2.6482758620689655e-05, "loss": 0.2698, "step": 6404 }, { "epoch": 22.086206896551722, "grad_norm": 1.4866307973861694, "learning_rate": 2.648689655172414e-05, "loss": 0.3476, "step": 6405 }, { "epoch": 22.089655172413792, "grad_norm": 0.8457053899765015, "learning_rate": 2.6491034482758623e-05, "loss": 0.3126, "step": 6406 }, { "epoch": 22.093103448275862, "grad_norm": 0.6074900031089783, "learning_rate": 2.64951724137931e-05, "loss": 0.2786, "step": 6407 }, { "epoch": 22.09655172413793, "grad_norm": 1.1623756885528564, "learning_rate": 2.6499310344827587e-05, "loss": 0.2677, "step": 6408 }, { "epoch": 22.1, "grad_norm": 0.654482901096344, "learning_rate": 2.650344827586207e-05, "loss": 0.2305, "step": 6409 }, { "epoch": 22.103448275862068, "grad_norm": 0.8902006149291992, "learning_rate": 2.6507586206896554e-05, "loss": 0.2683, "step": 6410 }, { "epoch": 22.106896551724137, "grad_norm": 1.0655180215835571, "learning_rate": 2.6511724137931033e-05, "loss": 0.2616, "step": 6411 }, { "epoch": 22.110344827586207, "grad_norm": 0.9943771958351135, "learning_rate": 2.6515862068965518e-05, "loss": 0.2802, "step": 6412 }, { "epoch": 22.113793103448277, "grad_norm": 0.702989399433136, "learning_rate": 2.652e-05, "loss": 0.242, "step": 6413 }, { "epoch": 22.117241379310343, "grad_norm": 0.9620406627655029, "learning_rate": 2.6524137931034486e-05, "loss": 0.2389, "step": 6414 }, { "epoch": 22.120689655172413, "grad_norm": 0.532151997089386, "learning_rate": 2.6528275862068964e-05, "loss": 0.2635, "step": 6415 }, { "epoch": 22.124137931034483, "grad_norm": 0.8375515341758728, "learning_rate": 2.653241379310345e-05, "loss": 0.2437, "step": 6416 }, { "epoch": 22.127586206896552, "grad_norm": 0.8030054569244385, "learning_rate": 2.653655172413793e-05, "loss": 0.276, "step": 6417 }, { "epoch": 22.131034482758622, "grad_norm": 1.1019893884658813, "learning_rate": 2.6540689655172417e-05, "loss": 0.2542, "step": 6418 }, { "epoch": 22.13448275862069, "grad_norm": 1.0052223205566406, "learning_rate": 2.6544827586206896e-05, "loss": 0.2582, "step": 6419 }, { "epoch": 22.137931034482758, "grad_norm": 0.8386905789375305, "learning_rate": 2.6548965517241378e-05, "loss": 0.2369, "step": 6420 }, { "epoch": 22.141379310344828, "grad_norm": 0.9052991271018982, "learning_rate": 2.6553103448275863e-05, "loss": 0.2266, "step": 6421 }, { "epoch": 22.144827586206898, "grad_norm": 2.69331693649292, "learning_rate": 2.6557241379310345e-05, "loss": 0.2671, "step": 6422 }, { "epoch": 22.148275862068967, "grad_norm": 1.78923761844635, "learning_rate": 2.656137931034483e-05, "loss": 0.2562, "step": 6423 }, { "epoch": 22.151724137931033, "grad_norm": 0.8250014781951904, "learning_rate": 2.656551724137931e-05, "loss": 0.2605, "step": 6424 }, { "epoch": 22.155172413793103, "grad_norm": 1.1215976476669312, "learning_rate": 2.6569655172413795e-05, "loss": 0.2489, "step": 6425 }, { "epoch": 22.158620689655173, "grad_norm": 1.072502851486206, "learning_rate": 2.6573793103448277e-05, "loss": 0.2534, "step": 6426 }, { "epoch": 22.162068965517243, "grad_norm": 1.6621445417404175, "learning_rate": 2.6577931034482762e-05, "loss": 0.2756, "step": 6427 }, { "epoch": 22.16551724137931, "grad_norm": 1.1100714206695557, "learning_rate": 2.658206896551724e-05, "loss": 0.2628, "step": 6428 }, { "epoch": 22.16896551724138, "grad_norm": 1.181709885597229, "learning_rate": 2.6586206896551726e-05, "loss": 0.3056, "step": 6429 }, { "epoch": 22.17241379310345, "grad_norm": 1.4102427959442139, "learning_rate": 2.6590344827586208e-05, "loss": 0.3435, "step": 6430 }, { "epoch": 22.175862068965518, "grad_norm": 0.6881830096244812, "learning_rate": 2.6594482758620693e-05, "loss": 0.3063, "step": 6431 }, { "epoch": 22.179310344827588, "grad_norm": 0.7921529412269592, "learning_rate": 2.6598620689655172e-05, "loss": 0.2759, "step": 6432 }, { "epoch": 22.182758620689654, "grad_norm": 1.4529705047607422, "learning_rate": 2.6602758620689654e-05, "loss": 0.2963, "step": 6433 }, { "epoch": 22.186206896551724, "grad_norm": 0.7086913585662842, "learning_rate": 2.660689655172414e-05, "loss": 0.2817, "step": 6434 }, { "epoch": 22.189655172413794, "grad_norm": 0.9052650332450867, "learning_rate": 2.661103448275862e-05, "loss": 0.2618, "step": 6435 }, { "epoch": 22.193103448275863, "grad_norm": 0.5428774356842041, "learning_rate": 2.6615172413793104e-05, "loss": 0.2415, "step": 6436 }, { "epoch": 22.19655172413793, "grad_norm": 0.70743328332901, "learning_rate": 2.6619310344827586e-05, "loss": 0.296, "step": 6437 }, { "epoch": 22.2, "grad_norm": 0.6563988924026489, "learning_rate": 2.662344827586207e-05, "loss": 0.2613, "step": 6438 }, { "epoch": 22.20344827586207, "grad_norm": 0.7504197359085083, "learning_rate": 2.6627586206896553e-05, "loss": 0.251, "step": 6439 }, { "epoch": 22.20689655172414, "grad_norm": 0.7597090601921082, "learning_rate": 2.6631724137931035e-05, "loss": 0.2732, "step": 6440 }, { "epoch": 22.21034482758621, "grad_norm": 0.5468843579292297, "learning_rate": 2.6635862068965517e-05, "loss": 0.2153, "step": 6441 }, { "epoch": 22.213793103448275, "grad_norm": 0.800082266330719, "learning_rate": 2.6640000000000002e-05, "loss": 0.2402, "step": 6442 }, { "epoch": 22.217241379310344, "grad_norm": 1.3882917165756226, "learning_rate": 2.6644137931034484e-05, "loss": 0.2488, "step": 6443 }, { "epoch": 22.220689655172414, "grad_norm": 0.7130727171897888, "learning_rate": 2.6648275862068966e-05, "loss": 0.2238, "step": 6444 }, { "epoch": 22.224137931034484, "grad_norm": 0.818480908870697, "learning_rate": 2.665241379310345e-05, "loss": 0.2817, "step": 6445 }, { "epoch": 22.22758620689655, "grad_norm": 0.8641953468322754, "learning_rate": 2.665655172413793e-05, "loss": 0.2472, "step": 6446 }, { "epoch": 22.23103448275862, "grad_norm": 0.6861943006515503, "learning_rate": 2.6660689655172416e-05, "loss": 0.2382, "step": 6447 }, { "epoch": 22.23448275862069, "grad_norm": 1.3482574224472046, "learning_rate": 2.6664827586206895e-05, "loss": 0.2461, "step": 6448 }, { "epoch": 22.23793103448276, "grad_norm": 0.8182105422019958, "learning_rate": 2.666896551724138e-05, "loss": 0.2373, "step": 6449 }, { "epoch": 22.24137931034483, "grad_norm": 1.3251014947891235, "learning_rate": 2.6673103448275862e-05, "loss": 0.243, "step": 6450 }, { "epoch": 22.244827586206895, "grad_norm": 1.1249016523361206, "learning_rate": 2.6677241379310347e-05, "loss": 0.2745, "step": 6451 }, { "epoch": 22.248275862068965, "grad_norm": 2.552072763442993, "learning_rate": 2.6681379310344826e-05, "loss": 0.2295, "step": 6452 }, { "epoch": 22.251724137931035, "grad_norm": 0.8677926063537598, "learning_rate": 2.668551724137931e-05, "loss": 0.2827, "step": 6453 }, { "epoch": 22.255172413793105, "grad_norm": 3.7095062732696533, "learning_rate": 2.6689655172413793e-05, "loss": 0.2906, "step": 6454 }, { "epoch": 22.25862068965517, "grad_norm": 1.8411946296691895, "learning_rate": 2.669379310344828e-05, "loss": 0.3608, "step": 6455 }, { "epoch": 22.26206896551724, "grad_norm": 0.7604774236679077, "learning_rate": 2.669793103448276e-05, "loss": 0.3333, "step": 6456 }, { "epoch": 22.26551724137931, "grad_norm": 0.7086179256439209, "learning_rate": 2.6702068965517243e-05, "loss": 0.2522, "step": 6457 }, { "epoch": 22.26896551724138, "grad_norm": 1.2773363590240479, "learning_rate": 2.6706206896551725e-05, "loss": 0.2686, "step": 6458 }, { "epoch": 22.27241379310345, "grad_norm": 0.9821736812591553, "learning_rate": 2.6710344827586207e-05, "loss": 0.2967, "step": 6459 }, { "epoch": 22.275862068965516, "grad_norm": 1.9738599061965942, "learning_rate": 2.6714482758620692e-05, "loss": 0.2533, "step": 6460 }, { "epoch": 22.279310344827586, "grad_norm": 0.6522148251533508, "learning_rate": 2.671862068965517e-05, "loss": 0.2674, "step": 6461 }, { "epoch": 22.282758620689656, "grad_norm": 0.8570660948753357, "learning_rate": 2.6722758620689656e-05, "loss": 0.2722, "step": 6462 }, { "epoch": 22.286206896551725, "grad_norm": 0.6716949343681335, "learning_rate": 2.672689655172414e-05, "loss": 0.2518, "step": 6463 }, { "epoch": 22.28965517241379, "grad_norm": 0.7349408864974976, "learning_rate": 2.6731034482758624e-05, "loss": 0.2426, "step": 6464 }, { "epoch": 22.29310344827586, "grad_norm": 0.9299097657203674, "learning_rate": 2.6735172413793102e-05, "loss": 0.2478, "step": 6465 }, { "epoch": 22.29655172413793, "grad_norm": 0.8815933465957642, "learning_rate": 2.6739310344827588e-05, "loss": 0.2553, "step": 6466 }, { "epoch": 22.3, "grad_norm": 0.7041778564453125, "learning_rate": 2.674344827586207e-05, "loss": 0.2558, "step": 6467 }, { "epoch": 22.30344827586207, "grad_norm": 0.7833335995674133, "learning_rate": 2.6747586206896555e-05, "loss": 0.2514, "step": 6468 }, { "epoch": 22.306896551724137, "grad_norm": 0.8206170201301575, "learning_rate": 2.6751724137931034e-05, "loss": 0.2638, "step": 6469 }, { "epoch": 22.310344827586206, "grad_norm": 0.8194870948791504, "learning_rate": 2.6755862068965516e-05, "loss": 0.2792, "step": 6470 }, { "epoch": 22.313793103448276, "grad_norm": 0.9092621803283691, "learning_rate": 2.676e-05, "loss": 0.2388, "step": 6471 }, { "epoch": 22.317241379310346, "grad_norm": 0.7519964575767517, "learning_rate": 2.6764137931034483e-05, "loss": 0.2509, "step": 6472 }, { "epoch": 22.320689655172412, "grad_norm": 0.9521816372871399, "learning_rate": 2.6768275862068965e-05, "loss": 0.244, "step": 6473 }, { "epoch": 22.324137931034482, "grad_norm": 0.7747823596000671, "learning_rate": 2.6772413793103447e-05, "loss": 0.2199, "step": 6474 }, { "epoch": 22.32758620689655, "grad_norm": 3.1000816822052, "learning_rate": 2.6776551724137933e-05, "loss": 0.2429, "step": 6475 }, { "epoch": 22.33103448275862, "grad_norm": 1.3188750743865967, "learning_rate": 2.6780689655172415e-05, "loss": 0.258, "step": 6476 }, { "epoch": 22.33448275862069, "grad_norm": 1.3708407878875732, "learning_rate": 2.6784827586206897e-05, "loss": 0.2656, "step": 6477 }, { "epoch": 22.337931034482757, "grad_norm": 2.4429843425750732, "learning_rate": 2.678896551724138e-05, "loss": 0.2784, "step": 6478 }, { "epoch": 22.341379310344827, "grad_norm": 2.304987668991089, "learning_rate": 2.6793103448275864e-05, "loss": 0.26, "step": 6479 }, { "epoch": 22.344827586206897, "grad_norm": 2.1122217178344727, "learning_rate": 2.6797241379310346e-05, "loss": 0.325, "step": 6480 }, { "epoch": 22.348275862068967, "grad_norm": 2.245695114135742, "learning_rate": 2.6801379310344828e-05, "loss": 0.286, "step": 6481 }, { "epoch": 22.351724137931033, "grad_norm": 0.6751993298530579, "learning_rate": 2.680551724137931e-05, "loss": 0.2682, "step": 6482 }, { "epoch": 22.355172413793102, "grad_norm": 1.2615841627120972, "learning_rate": 2.6809655172413792e-05, "loss": 0.2886, "step": 6483 }, { "epoch": 22.358620689655172, "grad_norm": 0.9852524995803833, "learning_rate": 2.6813793103448278e-05, "loss": 0.2956, "step": 6484 }, { "epoch": 22.362068965517242, "grad_norm": 0.6488897800445557, "learning_rate": 2.6817931034482756e-05, "loss": 0.2735, "step": 6485 }, { "epoch": 22.36551724137931, "grad_norm": 0.6548830270767212, "learning_rate": 2.6822068965517242e-05, "loss": 0.2843, "step": 6486 }, { "epoch": 22.368965517241378, "grad_norm": 0.5705157518386841, "learning_rate": 2.6826206896551724e-05, "loss": 0.2418, "step": 6487 }, { "epoch": 22.372413793103448, "grad_norm": 0.6369242668151855, "learning_rate": 2.683034482758621e-05, "loss": 0.2625, "step": 6488 }, { "epoch": 22.375862068965517, "grad_norm": 1.0755356550216675, "learning_rate": 2.6834482758620688e-05, "loss": 0.2529, "step": 6489 }, { "epoch": 22.379310344827587, "grad_norm": 0.767803430557251, "learning_rate": 2.6838620689655173e-05, "loss": 0.2644, "step": 6490 }, { "epoch": 22.382758620689657, "grad_norm": 1.181313157081604, "learning_rate": 2.6842758620689655e-05, "loss": 0.2624, "step": 6491 }, { "epoch": 22.386206896551723, "grad_norm": 1.0235482454299927, "learning_rate": 2.684689655172414e-05, "loss": 0.2704, "step": 6492 }, { "epoch": 22.389655172413793, "grad_norm": 0.9523573517799377, "learning_rate": 2.6851034482758623e-05, "loss": 0.2404, "step": 6493 }, { "epoch": 22.393103448275863, "grad_norm": 0.9975404739379883, "learning_rate": 2.6855172413793105e-05, "loss": 0.2531, "step": 6494 }, { "epoch": 22.396551724137932, "grad_norm": 1.0219305753707886, "learning_rate": 2.6859310344827587e-05, "loss": 0.2483, "step": 6495 }, { "epoch": 22.4, "grad_norm": 0.7047698497772217, "learning_rate": 2.686344827586207e-05, "loss": 0.226, "step": 6496 }, { "epoch": 22.40344827586207, "grad_norm": 0.636716365814209, "learning_rate": 2.6867586206896554e-05, "loss": 0.2379, "step": 6497 }, { "epoch": 22.406896551724138, "grad_norm": 0.8900573253631592, "learning_rate": 2.6871724137931033e-05, "loss": 0.2619, "step": 6498 }, { "epoch": 22.410344827586208, "grad_norm": 0.8086820840835571, "learning_rate": 2.6875862068965518e-05, "loss": 0.2466, "step": 6499 }, { "epoch": 22.413793103448278, "grad_norm": 1.3308192491531372, "learning_rate": 2.688e-05, "loss": 0.266, "step": 6500 }, { "epoch": 22.417241379310344, "grad_norm": 2.228461265563965, "learning_rate": 2.6884137931034486e-05, "loss": 0.239, "step": 6501 }, { "epoch": 22.420689655172414, "grad_norm": 1.1792776584625244, "learning_rate": 2.6888275862068964e-05, "loss": 0.2491, "step": 6502 }, { "epoch": 22.424137931034483, "grad_norm": 1.569554328918457, "learning_rate": 2.689241379310345e-05, "loss": 0.2987, "step": 6503 }, { "epoch": 22.427586206896553, "grad_norm": 1.5952153205871582, "learning_rate": 2.689655172413793e-05, "loss": 0.2615, "step": 6504 }, { "epoch": 22.43103448275862, "grad_norm": 2.667785882949829, "learning_rate": 2.6900689655172417e-05, "loss": 0.3714, "step": 6505 }, { "epoch": 22.43448275862069, "grad_norm": 0.7934789061546326, "learning_rate": 2.6904827586206896e-05, "loss": 0.3403, "step": 6506 }, { "epoch": 22.43793103448276, "grad_norm": 0.6564393639564514, "learning_rate": 2.690896551724138e-05, "loss": 0.2942, "step": 6507 }, { "epoch": 22.44137931034483, "grad_norm": 0.5258413553237915, "learning_rate": 2.6913103448275863e-05, "loss": 0.2765, "step": 6508 }, { "epoch": 22.444827586206898, "grad_norm": 1.0550178289413452, "learning_rate": 2.6917241379310345e-05, "loss": 0.2788, "step": 6509 }, { "epoch": 22.448275862068964, "grad_norm": 0.8345663547515869, "learning_rate": 2.6921379310344827e-05, "loss": 0.2869, "step": 6510 }, { "epoch": 22.451724137931034, "grad_norm": 0.9088352918624878, "learning_rate": 2.692551724137931e-05, "loss": 0.2529, "step": 6511 }, { "epoch": 22.455172413793104, "grad_norm": 0.7649404406547546, "learning_rate": 2.6929655172413795e-05, "loss": 0.2769, "step": 6512 }, { "epoch": 22.458620689655174, "grad_norm": 1.327968716621399, "learning_rate": 2.6933793103448277e-05, "loss": 0.249, "step": 6513 }, { "epoch": 22.46206896551724, "grad_norm": 0.8661448359489441, "learning_rate": 2.693793103448276e-05, "loss": 0.2749, "step": 6514 }, { "epoch": 22.46551724137931, "grad_norm": 1.360645055770874, "learning_rate": 2.694206896551724e-05, "loss": 0.2257, "step": 6515 }, { "epoch": 22.46896551724138, "grad_norm": 1.2890676259994507, "learning_rate": 2.6946206896551726e-05, "loss": 0.2899, "step": 6516 }, { "epoch": 22.47241379310345, "grad_norm": 0.6875988245010376, "learning_rate": 2.6950344827586208e-05, "loss": 0.2579, "step": 6517 }, { "epoch": 22.47586206896552, "grad_norm": 0.9369156360626221, "learning_rate": 2.695448275862069e-05, "loss": 0.2433, "step": 6518 }, { "epoch": 22.479310344827585, "grad_norm": 0.9625588655471802, "learning_rate": 2.6958620689655172e-05, "loss": 0.2795, "step": 6519 }, { "epoch": 22.482758620689655, "grad_norm": 0.9571377038955688, "learning_rate": 2.6962758620689657e-05, "loss": 0.2747, "step": 6520 }, { "epoch": 22.486206896551725, "grad_norm": 1.4188543558120728, "learning_rate": 2.696689655172414e-05, "loss": 0.2215, "step": 6521 }, { "epoch": 22.489655172413794, "grad_norm": 0.9959824681282043, "learning_rate": 2.6971034482758618e-05, "loss": 0.2737, "step": 6522 }, { "epoch": 22.49310344827586, "grad_norm": 1.1787447929382324, "learning_rate": 2.6975172413793103e-05, "loss": 0.2447, "step": 6523 }, { "epoch": 22.49655172413793, "grad_norm": 1.640143632888794, "learning_rate": 2.6979310344827586e-05, "loss": 0.2524, "step": 6524 }, { "epoch": 22.5, "grad_norm": 0.9604923129081726, "learning_rate": 2.698344827586207e-05, "loss": 0.251, "step": 6525 }, { "epoch": 22.50344827586207, "grad_norm": 0.9766765832901001, "learning_rate": 2.6987586206896553e-05, "loss": 0.2178, "step": 6526 }, { "epoch": 22.50689655172414, "grad_norm": 1.9592018127441406, "learning_rate": 2.6991724137931035e-05, "loss": 0.2652, "step": 6527 }, { "epoch": 22.510344827586206, "grad_norm": 0.9413827657699585, "learning_rate": 2.6995862068965517e-05, "loss": 0.2373, "step": 6528 }, { "epoch": 22.513793103448275, "grad_norm": 1.1798737049102783, "learning_rate": 2.7000000000000002e-05, "loss": 0.2378, "step": 6529 }, { "epoch": 22.517241379310345, "grad_norm": 1.6899665594100952, "learning_rate": 2.7004137931034484e-05, "loss": 0.3602, "step": 6530 }, { "epoch": 22.520689655172415, "grad_norm": 0.7984972596168518, "learning_rate": 2.7008275862068966e-05, "loss": 0.3485, "step": 6531 }, { "epoch": 22.52413793103448, "grad_norm": 0.6496527194976807, "learning_rate": 2.701241379310345e-05, "loss": 0.2709, "step": 6532 }, { "epoch": 22.52758620689655, "grad_norm": 0.6768767237663269, "learning_rate": 2.7016551724137934e-05, "loss": 0.2842, "step": 6533 }, { "epoch": 22.53103448275862, "grad_norm": 0.953281581401825, "learning_rate": 2.7020689655172416e-05, "loss": 0.3039, "step": 6534 }, { "epoch": 22.53448275862069, "grad_norm": 0.6631651520729065, "learning_rate": 2.7024827586206894e-05, "loss": 0.2883, "step": 6535 }, { "epoch": 22.53793103448276, "grad_norm": 0.81630539894104, "learning_rate": 2.702896551724138e-05, "loss": 0.2543, "step": 6536 }, { "epoch": 22.541379310344826, "grad_norm": 0.9083176851272583, "learning_rate": 2.7033103448275862e-05, "loss": 0.2758, "step": 6537 }, { "epoch": 22.544827586206896, "grad_norm": 0.7607098817825317, "learning_rate": 2.7037241379310347e-05, "loss": 0.2553, "step": 6538 }, { "epoch": 22.548275862068966, "grad_norm": 0.7970141768455505, "learning_rate": 2.7041379310344826e-05, "loss": 0.2633, "step": 6539 }, { "epoch": 22.551724137931036, "grad_norm": 1.559446930885315, "learning_rate": 2.704551724137931e-05, "loss": 0.2685, "step": 6540 }, { "epoch": 22.555172413793102, "grad_norm": 1.1946451663970947, "learning_rate": 2.7049655172413793e-05, "loss": 0.2583, "step": 6541 }, { "epoch": 22.55862068965517, "grad_norm": 0.9501392841339111, "learning_rate": 2.705379310344828e-05, "loss": 0.2842, "step": 6542 }, { "epoch": 22.56206896551724, "grad_norm": 3.2240943908691406, "learning_rate": 2.7057931034482757e-05, "loss": 0.2585, "step": 6543 }, { "epoch": 22.56551724137931, "grad_norm": 1.587738275527954, "learning_rate": 2.7062068965517243e-05, "loss": 0.259, "step": 6544 }, { "epoch": 22.56896551724138, "grad_norm": 0.7007700204849243, "learning_rate": 2.7066206896551725e-05, "loss": 0.2324, "step": 6545 }, { "epoch": 22.572413793103447, "grad_norm": 3.0468361377716064, "learning_rate": 2.707034482758621e-05, "loss": 0.2646, "step": 6546 }, { "epoch": 22.575862068965517, "grad_norm": 0.829055666923523, "learning_rate": 2.707448275862069e-05, "loss": 0.2379, "step": 6547 }, { "epoch": 22.579310344827586, "grad_norm": 1.2977020740509033, "learning_rate": 2.707862068965517e-05, "loss": 0.2503, "step": 6548 }, { "epoch": 22.582758620689656, "grad_norm": 1.5573668479919434, "learning_rate": 2.7082758620689656e-05, "loss": 0.2905, "step": 6549 }, { "epoch": 22.586206896551722, "grad_norm": 1.2439223527908325, "learning_rate": 2.708689655172414e-05, "loss": 0.2454, "step": 6550 }, { "epoch": 22.589655172413792, "grad_norm": 0.8116556406021118, "learning_rate": 2.709103448275862e-05, "loss": 0.2657, "step": 6551 }, { "epoch": 22.593103448275862, "grad_norm": 3.5307657718658447, "learning_rate": 2.7095172413793102e-05, "loss": 0.2372, "step": 6552 }, { "epoch": 22.59655172413793, "grad_norm": 1.3057465553283691, "learning_rate": 2.7099310344827588e-05, "loss": 0.2434, "step": 6553 }, { "epoch": 22.6, "grad_norm": 0.8331784605979919, "learning_rate": 2.710344827586207e-05, "loss": 0.2588, "step": 6554 }, { "epoch": 22.603448275862068, "grad_norm": 1.4439117908477783, "learning_rate": 2.7107586206896552e-05, "loss": 0.3716, "step": 6555 }, { "epoch": 22.606896551724137, "grad_norm": 0.7168629169464111, "learning_rate": 2.7111724137931034e-05, "loss": 0.3335, "step": 6556 }, { "epoch": 22.610344827586207, "grad_norm": 0.5921469926834106, "learning_rate": 2.711586206896552e-05, "loss": 0.2957, "step": 6557 }, { "epoch": 22.613793103448277, "grad_norm": 0.6149985790252686, "learning_rate": 2.712e-05, "loss": 0.312, "step": 6558 }, { "epoch": 22.617241379310343, "grad_norm": 0.5679067373275757, "learning_rate": 2.7124137931034487e-05, "loss": 0.2619, "step": 6559 }, { "epoch": 22.620689655172413, "grad_norm": 0.7185279130935669, "learning_rate": 2.7128275862068965e-05, "loss": 0.3037, "step": 6560 }, { "epoch": 22.624137931034483, "grad_norm": 1.0648448467254639, "learning_rate": 2.7132413793103447e-05, "loss": 0.2556, "step": 6561 }, { "epoch": 22.627586206896552, "grad_norm": 0.823658287525177, "learning_rate": 2.7136551724137933e-05, "loss": 0.2548, "step": 6562 }, { "epoch": 22.631034482758622, "grad_norm": 0.8322895169258118, "learning_rate": 2.7140689655172415e-05, "loss": 0.246, "step": 6563 }, { "epoch": 22.63448275862069, "grad_norm": 0.7158086895942688, "learning_rate": 2.7144827586206897e-05, "loss": 0.2429, "step": 6564 }, { "epoch": 22.637931034482758, "grad_norm": 0.6044756174087524, "learning_rate": 2.714896551724138e-05, "loss": 0.2336, "step": 6565 }, { "epoch": 22.641379310344828, "grad_norm": 1.1419366598129272, "learning_rate": 2.7153103448275864e-05, "loss": 0.2297, "step": 6566 }, { "epoch": 22.644827586206898, "grad_norm": 0.6634604334831238, "learning_rate": 2.7157241379310346e-05, "loss": 0.2721, "step": 6567 }, { "epoch": 22.648275862068964, "grad_norm": 1.1250768899917603, "learning_rate": 2.7161379310344828e-05, "loss": 0.2195, "step": 6568 }, { "epoch": 22.651724137931033, "grad_norm": 1.61873197555542, "learning_rate": 2.716551724137931e-05, "loss": 0.2522, "step": 6569 }, { "epoch": 22.655172413793103, "grad_norm": 0.960007905960083, "learning_rate": 2.7169655172413796e-05, "loss": 0.2319, "step": 6570 }, { "epoch": 22.658620689655173, "grad_norm": 0.9303498864173889, "learning_rate": 2.7173793103448278e-05, "loss": 0.2552, "step": 6571 }, { "epoch": 22.662068965517243, "grad_norm": 1.288588285446167, "learning_rate": 2.717793103448276e-05, "loss": 0.27, "step": 6572 }, { "epoch": 22.66551724137931, "grad_norm": 1.1068984270095825, "learning_rate": 2.718206896551724e-05, "loss": 0.2485, "step": 6573 }, { "epoch": 22.66896551724138, "grad_norm": 0.7893387079238892, "learning_rate": 2.7186206896551724e-05, "loss": 0.249, "step": 6574 }, { "epoch": 22.67241379310345, "grad_norm": 1.0341452360153198, "learning_rate": 2.719034482758621e-05, "loss": 0.2335, "step": 6575 }, { "epoch": 22.675862068965518, "grad_norm": 1.2555032968521118, "learning_rate": 2.7194482758620688e-05, "loss": 0.2673, "step": 6576 }, { "epoch": 22.679310344827588, "grad_norm": 1.2908093929290771, "learning_rate": 2.7198620689655173e-05, "loss": 0.2718, "step": 6577 }, { "epoch": 22.682758620689654, "grad_norm": 0.8370832800865173, "learning_rate": 2.7202758620689655e-05, "loss": 0.2396, "step": 6578 }, { "epoch": 22.686206896551724, "grad_norm": 1.5214279890060425, "learning_rate": 2.720689655172414e-05, "loss": 0.2616, "step": 6579 }, { "epoch": 22.689655172413794, "grad_norm": 1.5493884086608887, "learning_rate": 2.721103448275862e-05, "loss": 0.3565, "step": 6580 }, { "epoch": 22.693103448275863, "grad_norm": 0.8499820828437805, "learning_rate": 2.7215172413793105e-05, "loss": 0.2952, "step": 6581 }, { "epoch": 22.69655172413793, "grad_norm": 0.7072759866714478, "learning_rate": 2.7219310344827587e-05, "loss": 0.2981, "step": 6582 }, { "epoch": 22.7, "grad_norm": 1.0916701555252075, "learning_rate": 2.7223448275862072e-05, "loss": 0.2546, "step": 6583 }, { "epoch": 22.70344827586207, "grad_norm": 0.5832089781761169, "learning_rate": 2.722758620689655e-05, "loss": 0.2428, "step": 6584 }, { "epoch": 22.70689655172414, "grad_norm": 0.9571926593780518, "learning_rate": 2.7231724137931036e-05, "loss": 0.2636, "step": 6585 }, { "epoch": 22.71034482758621, "grad_norm": 0.6146795749664307, "learning_rate": 2.7235862068965518e-05, "loss": 0.2498, "step": 6586 }, { "epoch": 22.713793103448275, "grad_norm": 5.8644700050354, "learning_rate": 2.724e-05, "loss": 0.2442, "step": 6587 }, { "epoch": 22.717241379310344, "grad_norm": 0.8261671662330627, "learning_rate": 2.7244137931034482e-05, "loss": 0.3055, "step": 6588 }, { "epoch": 22.720689655172414, "grad_norm": 0.650175929069519, "learning_rate": 2.7248275862068964e-05, "loss": 0.2555, "step": 6589 }, { "epoch": 22.724137931034484, "grad_norm": 1.0636366605758667, "learning_rate": 2.725241379310345e-05, "loss": 0.2414, "step": 6590 }, { "epoch": 22.72758620689655, "grad_norm": 0.765870213508606, "learning_rate": 2.725655172413793e-05, "loss": 0.2529, "step": 6591 }, { "epoch": 22.73103448275862, "grad_norm": 0.7074832320213318, "learning_rate": 2.7260689655172417e-05, "loss": 0.2813, "step": 6592 }, { "epoch": 22.73448275862069, "grad_norm": 0.6674700975418091, "learning_rate": 2.7264827586206896e-05, "loss": 0.2508, "step": 6593 }, { "epoch": 22.73793103448276, "grad_norm": 0.8758313059806824, "learning_rate": 2.726896551724138e-05, "loss": 0.2682, "step": 6594 }, { "epoch": 22.74137931034483, "grad_norm": 0.914210855960846, "learning_rate": 2.7273103448275863e-05, "loss": 0.25, "step": 6595 }, { "epoch": 22.744827586206895, "grad_norm": 1.097032904624939, "learning_rate": 2.727724137931035e-05, "loss": 0.227, "step": 6596 }, { "epoch": 22.748275862068965, "grad_norm": 0.6224070191383362, "learning_rate": 2.7281379310344827e-05, "loss": 0.2357, "step": 6597 }, { "epoch": 22.751724137931035, "grad_norm": 1.3664076328277588, "learning_rate": 2.7285517241379312e-05, "loss": 0.2227, "step": 6598 }, { "epoch": 22.755172413793105, "grad_norm": 1.1820017099380493, "learning_rate": 2.7289655172413794e-05, "loss": 0.2358, "step": 6599 }, { "epoch": 22.75862068965517, "grad_norm": 1.0895946025848389, "learning_rate": 2.7293793103448276e-05, "loss": 0.2764, "step": 6600 }, { "epoch": 22.76206896551724, "grad_norm": 0.8455820083618164, "learning_rate": 2.729793103448276e-05, "loss": 0.2638, "step": 6601 }, { "epoch": 22.76551724137931, "grad_norm": 1.142327904701233, "learning_rate": 2.730206896551724e-05, "loss": 0.245, "step": 6602 }, { "epoch": 22.76896551724138, "grad_norm": 0.9139681458473206, "learning_rate": 2.7306206896551726e-05, "loss": 0.2489, "step": 6603 }, { "epoch": 22.77241379310345, "grad_norm": 1.0840227603912354, "learning_rate": 2.7310344827586208e-05, "loss": 0.279, "step": 6604 }, { "epoch": 22.775862068965516, "grad_norm": 1.4696595668792725, "learning_rate": 2.731448275862069e-05, "loss": 0.4025, "step": 6605 }, { "epoch": 22.779310344827586, "grad_norm": 0.7520064115524292, "learning_rate": 2.7318620689655172e-05, "loss": 0.3087, "step": 6606 }, { "epoch": 22.782758620689656, "grad_norm": 0.5878284573554993, "learning_rate": 2.7322758620689657e-05, "loss": 0.2757, "step": 6607 }, { "epoch": 22.786206896551725, "grad_norm": 0.7452014684677124, "learning_rate": 2.732689655172414e-05, "loss": 0.2777, "step": 6608 }, { "epoch": 22.78965517241379, "grad_norm": 1.6345022916793823, "learning_rate": 2.733103448275862e-05, "loss": 0.2805, "step": 6609 }, { "epoch": 22.79310344827586, "grad_norm": 1.234263300895691, "learning_rate": 2.7335172413793103e-05, "loss": 0.2733, "step": 6610 }, { "epoch": 22.79655172413793, "grad_norm": 1.0683348178863525, "learning_rate": 2.733931034482759e-05, "loss": 0.2725, "step": 6611 }, { "epoch": 22.8, "grad_norm": 0.6238831877708435, "learning_rate": 2.734344827586207e-05, "loss": 0.2685, "step": 6612 }, { "epoch": 22.80344827586207, "grad_norm": 1.0860215425491333, "learning_rate": 2.734758620689655e-05, "loss": 0.2711, "step": 6613 }, { "epoch": 22.806896551724137, "grad_norm": 0.9166340231895447, "learning_rate": 2.7351724137931035e-05, "loss": 0.269, "step": 6614 }, { "epoch": 22.810344827586206, "grad_norm": 0.6958682537078857, "learning_rate": 2.7355862068965517e-05, "loss": 0.2581, "step": 6615 }, { "epoch": 22.813793103448276, "grad_norm": 1.1130293607711792, "learning_rate": 2.7360000000000002e-05, "loss": 0.2311, "step": 6616 }, { "epoch": 22.817241379310346, "grad_norm": 0.824988067150116, "learning_rate": 2.736413793103448e-05, "loss": 0.2869, "step": 6617 }, { "epoch": 22.820689655172412, "grad_norm": 1.265612006187439, "learning_rate": 2.7368275862068966e-05, "loss": 0.2314, "step": 6618 }, { "epoch": 22.824137931034482, "grad_norm": 0.8053555488586426, "learning_rate": 2.737241379310345e-05, "loss": 0.2715, "step": 6619 }, { "epoch": 22.82758620689655, "grad_norm": 2.205329418182373, "learning_rate": 2.7376551724137934e-05, "loss": 0.2521, "step": 6620 }, { "epoch": 22.83103448275862, "grad_norm": 1.264069676399231, "learning_rate": 2.7380689655172412e-05, "loss": 0.275, "step": 6621 }, { "epoch": 22.83448275862069, "grad_norm": 3.686694860458374, "learning_rate": 2.7384827586206898e-05, "loss": 0.2625, "step": 6622 }, { "epoch": 22.837931034482757, "grad_norm": 0.8680737018585205, "learning_rate": 2.738896551724138e-05, "loss": 0.2362, "step": 6623 }, { "epoch": 22.841379310344827, "grad_norm": 0.7412057518959045, "learning_rate": 2.7393103448275865e-05, "loss": 0.3027, "step": 6624 }, { "epoch": 22.844827586206897, "grad_norm": 1.7600301504135132, "learning_rate": 2.7397241379310347e-05, "loss": 0.2302, "step": 6625 }, { "epoch": 22.848275862068967, "grad_norm": 0.7624808549880981, "learning_rate": 2.7401379310344826e-05, "loss": 0.2682, "step": 6626 }, { "epoch": 22.851724137931036, "grad_norm": 2.7186949253082275, "learning_rate": 2.740551724137931e-05, "loss": 0.2465, "step": 6627 }, { "epoch": 22.855172413793102, "grad_norm": 1.6806697845458984, "learning_rate": 2.7409655172413793e-05, "loss": 0.2435, "step": 6628 }, { "epoch": 22.858620689655172, "grad_norm": 1.5498625040054321, "learning_rate": 2.741379310344828e-05, "loss": 0.2518, "step": 6629 }, { "epoch": 22.862068965517242, "grad_norm": 2.3926005363464355, "learning_rate": 2.7417931034482757e-05, "loss": 0.3624, "step": 6630 }, { "epoch": 22.86551724137931, "grad_norm": 1.2010033130645752, "learning_rate": 2.7422068965517243e-05, "loss": 0.3606, "step": 6631 }, { "epoch": 22.868965517241378, "grad_norm": 1.1315276622772217, "learning_rate": 2.7426206896551725e-05, "loss": 0.3084, "step": 6632 }, { "epoch": 22.872413793103448, "grad_norm": 0.5962635278701782, "learning_rate": 2.743034482758621e-05, "loss": 0.2561, "step": 6633 }, { "epoch": 22.875862068965517, "grad_norm": 1.28289794921875, "learning_rate": 2.743448275862069e-05, "loss": 0.2854, "step": 6634 }, { "epoch": 22.879310344827587, "grad_norm": 0.6962518095970154, "learning_rate": 2.7438620689655174e-05, "loss": 0.2711, "step": 6635 }, { "epoch": 22.882758620689657, "grad_norm": 1.5229628086090088, "learning_rate": 2.7442758620689656e-05, "loss": 0.2667, "step": 6636 }, { "epoch": 22.886206896551723, "grad_norm": 0.8421207070350647, "learning_rate": 2.7446896551724138e-05, "loss": 0.2708, "step": 6637 }, { "epoch": 22.889655172413793, "grad_norm": 1.4184848070144653, "learning_rate": 2.745103448275862e-05, "loss": 0.2729, "step": 6638 }, { "epoch": 22.893103448275863, "grad_norm": 0.8652958273887634, "learning_rate": 2.7455172413793102e-05, "loss": 0.2581, "step": 6639 }, { "epoch": 22.896551724137932, "grad_norm": 1.163010597229004, "learning_rate": 2.7459310344827588e-05, "loss": 0.2663, "step": 6640 }, { "epoch": 22.9, "grad_norm": 0.6599036455154419, "learning_rate": 2.746344827586207e-05, "loss": 0.2295, "step": 6641 }, { "epoch": 22.90344827586207, "grad_norm": 0.7970104813575745, "learning_rate": 2.7467586206896552e-05, "loss": 0.271, "step": 6642 }, { "epoch": 22.906896551724138, "grad_norm": 0.9137216210365295, "learning_rate": 2.7471724137931034e-05, "loss": 0.2765, "step": 6643 }, { "epoch": 22.910344827586208, "grad_norm": 1.1687418222427368, "learning_rate": 2.747586206896552e-05, "loss": 0.2676, "step": 6644 }, { "epoch": 22.913793103448278, "grad_norm": 1.7108975648880005, "learning_rate": 2.748e-05, "loss": 0.2686, "step": 6645 }, { "epoch": 22.917241379310344, "grad_norm": 0.7769878506660461, "learning_rate": 2.7484137931034483e-05, "loss": 0.2582, "step": 6646 }, { "epoch": 22.920689655172414, "grad_norm": 0.7400926351547241, "learning_rate": 2.7488275862068965e-05, "loss": 0.2472, "step": 6647 }, { "epoch": 22.924137931034483, "grad_norm": 1.0580575466156006, "learning_rate": 2.749241379310345e-05, "loss": 0.2402, "step": 6648 }, { "epoch": 22.927586206896553, "grad_norm": 0.7483630180358887, "learning_rate": 2.7496551724137933e-05, "loss": 0.2763, "step": 6649 }, { "epoch": 22.93103448275862, "grad_norm": 1.8836556673049927, "learning_rate": 2.7500689655172415e-05, "loss": 0.2507, "step": 6650 }, { "epoch": 22.93448275862069, "grad_norm": 1.5904589891433716, "learning_rate": 2.7504827586206897e-05, "loss": 0.3088, "step": 6651 }, { "epoch": 22.93793103448276, "grad_norm": 1.8028382062911987, "learning_rate": 2.750896551724138e-05, "loss": 0.2183, "step": 6652 }, { "epoch": 22.94137931034483, "grad_norm": 0.9939783215522766, "learning_rate": 2.7513103448275864e-05, "loss": 0.2775, "step": 6653 }, { "epoch": 22.944827586206898, "grad_norm": 0.969602108001709, "learning_rate": 2.7517241379310343e-05, "loss": 0.3058, "step": 6654 }, { "epoch": 22.948275862068964, "grad_norm": 2.549001932144165, "learning_rate": 2.7521379310344828e-05, "loss": 0.372, "step": 6655 }, { "epoch": 22.951724137931034, "grad_norm": 1.4089365005493164, "learning_rate": 2.752551724137931e-05, "loss": 0.3095, "step": 6656 }, { "epoch": 22.955172413793104, "grad_norm": 0.7978329062461853, "learning_rate": 2.7529655172413796e-05, "loss": 0.2611, "step": 6657 }, { "epoch": 22.958620689655174, "grad_norm": 0.7662584185600281, "learning_rate": 2.7533793103448278e-05, "loss": 0.259, "step": 6658 }, { "epoch": 22.96206896551724, "grad_norm": 0.9916058778762817, "learning_rate": 2.753793103448276e-05, "loss": 0.2774, "step": 6659 }, { "epoch": 22.96551724137931, "grad_norm": 0.7736018896102905, "learning_rate": 2.754206896551724e-05, "loss": 0.2676, "step": 6660 }, { "epoch": 22.96896551724138, "grad_norm": 1.822123408317566, "learning_rate": 2.7546206896551727e-05, "loss": 0.2359, "step": 6661 }, { "epoch": 22.97241379310345, "grad_norm": 0.9101890325546265, "learning_rate": 2.755034482758621e-05, "loss": 0.2488, "step": 6662 }, { "epoch": 22.97586206896552, "grad_norm": 0.6597132682800293, "learning_rate": 2.7554482758620688e-05, "loss": 0.2561, "step": 6663 }, { "epoch": 22.979310344827585, "grad_norm": 0.7713621258735657, "learning_rate": 2.7558620689655173e-05, "loss": 0.2682, "step": 6664 }, { "epoch": 22.982758620689655, "grad_norm": 0.8398605585098267, "learning_rate": 2.7562758620689655e-05, "loss": 0.2554, "step": 6665 }, { "epoch": 22.986206896551725, "grad_norm": 1.4037744998931885, "learning_rate": 2.756689655172414e-05, "loss": 0.2411, "step": 6666 }, { "epoch": 22.989655172413794, "grad_norm": 0.9124851226806641, "learning_rate": 2.757103448275862e-05, "loss": 0.2406, "step": 6667 }, { "epoch": 22.99310344827586, "grad_norm": 0.9334966540336609, "learning_rate": 2.7575172413793105e-05, "loss": 0.2682, "step": 6668 }, { "epoch": 22.99655172413793, "grad_norm": 1.293317437171936, "learning_rate": 2.7579310344827587e-05, "loss": 0.2828, "step": 6669 }, { "epoch": 23.0, "grad_norm": 2.2019879817962646, "learning_rate": 2.7583448275862072e-05, "loss": 0.3627, "step": 6670 }, { "epoch": 23.00344827586207, "grad_norm": 0.8214565515518188, "learning_rate": 2.758758620689655e-05, "loss": 0.3106, "step": 6671 }, { "epoch": 23.00689655172414, "grad_norm": 0.7620720267295837, "learning_rate": 2.7591724137931036e-05, "loss": 0.2564, "step": 6672 }, { "epoch": 23.010344827586206, "grad_norm": 0.8076344728469849, "learning_rate": 2.7595862068965518e-05, "loss": 0.2954, "step": 6673 }, { "epoch": 23.013793103448275, "grad_norm": 1.052618145942688, "learning_rate": 2.7600000000000003e-05, "loss": 0.2585, "step": 6674 }, { "epoch": 23.017241379310345, "grad_norm": 0.7261592745780945, "learning_rate": 2.7604137931034482e-05, "loss": 0.2693, "step": 6675 }, { "epoch": 23.020689655172415, "grad_norm": 0.9127947688102722, "learning_rate": 2.7608275862068964e-05, "loss": 0.2267, "step": 6676 }, { "epoch": 23.02413793103448, "grad_norm": 1.4783129692077637, "learning_rate": 2.761241379310345e-05, "loss": 0.2454, "step": 6677 }, { "epoch": 23.02758620689655, "grad_norm": 1.4849419593811035, "learning_rate": 2.761655172413793e-05, "loss": 0.2977, "step": 6678 }, { "epoch": 23.03103448275862, "grad_norm": 0.874177873134613, "learning_rate": 2.7620689655172413e-05, "loss": 0.2265, "step": 6679 }, { "epoch": 23.03448275862069, "grad_norm": 1.2015142440795898, "learning_rate": 2.7624827586206896e-05, "loss": 0.2269, "step": 6680 }, { "epoch": 23.03793103448276, "grad_norm": 1.1847277879714966, "learning_rate": 2.762896551724138e-05, "loss": 0.2707, "step": 6681 }, { "epoch": 23.041379310344826, "grad_norm": 0.7105205059051514, "learning_rate": 2.7633103448275863e-05, "loss": 0.2493, "step": 6682 }, { "epoch": 23.044827586206896, "grad_norm": 1.2354472875595093, "learning_rate": 2.7637241379310345e-05, "loss": 0.2253, "step": 6683 }, { "epoch": 23.048275862068966, "grad_norm": 0.8711403608322144, "learning_rate": 2.7641379310344827e-05, "loss": 0.2771, "step": 6684 }, { "epoch": 23.051724137931036, "grad_norm": 0.7427960634231567, "learning_rate": 2.7645517241379312e-05, "loss": 0.2394, "step": 6685 }, { "epoch": 23.055172413793102, "grad_norm": 1.2197039127349854, "learning_rate": 2.7649655172413794e-05, "loss": 0.2413, "step": 6686 }, { "epoch": 23.05862068965517, "grad_norm": 0.8575798869132996, "learning_rate": 2.7653793103448276e-05, "loss": 0.2367, "step": 6687 }, { "epoch": 23.06206896551724, "grad_norm": 1.0365952253341675, "learning_rate": 2.765793103448276e-05, "loss": 0.2555, "step": 6688 }, { "epoch": 23.06551724137931, "grad_norm": 1.067115068435669, "learning_rate": 2.766206896551724e-05, "loss": 0.2159, "step": 6689 }, { "epoch": 23.06896551724138, "grad_norm": 0.9836422204971313, "learning_rate": 2.7666206896551726e-05, "loss": 0.2489, "step": 6690 }, { "epoch": 23.072413793103447, "grad_norm": 0.9978858828544617, "learning_rate": 2.7670344827586208e-05, "loss": 0.2467, "step": 6691 }, { "epoch": 23.075862068965517, "grad_norm": 1.3798561096191406, "learning_rate": 2.767448275862069e-05, "loss": 0.2307, "step": 6692 }, { "epoch": 23.079310344827586, "grad_norm": 1.0726737976074219, "learning_rate": 2.7678620689655172e-05, "loss": 0.2388, "step": 6693 }, { "epoch": 23.082758620689656, "grad_norm": 1.2641788721084595, "learning_rate": 2.7682758620689657e-05, "loss": 0.2959, "step": 6694 }, { "epoch": 23.086206896551722, "grad_norm": 1.5370502471923828, "learning_rate": 2.768689655172414e-05, "loss": 0.3191, "step": 6695 }, { "epoch": 23.089655172413792, "grad_norm": 0.8540544509887695, "learning_rate": 2.769103448275862e-05, "loss": 0.3368, "step": 6696 }, { "epoch": 23.093103448275862, "grad_norm": 1.1547361612319946, "learning_rate": 2.7695172413793103e-05, "loss": 0.3019, "step": 6697 }, { "epoch": 23.09655172413793, "grad_norm": 1.0225762128829956, "learning_rate": 2.769931034482759e-05, "loss": 0.2856, "step": 6698 }, { "epoch": 23.1, "grad_norm": 0.7312541007995605, "learning_rate": 2.770344827586207e-05, "loss": 0.2574, "step": 6699 }, { "epoch": 23.103448275862068, "grad_norm": 0.7103402018547058, "learning_rate": 2.7707586206896553e-05, "loss": 0.2723, "step": 6700 }, { "epoch": 23.106896551724137, "grad_norm": 1.0412535667419434, "learning_rate": 2.7711724137931035e-05, "loss": 0.2621, "step": 6701 }, { "epoch": 23.110344827586207, "grad_norm": 1.119908094406128, "learning_rate": 2.7715862068965517e-05, "loss": 0.248, "step": 6702 }, { "epoch": 23.113793103448277, "grad_norm": 1.3441805839538574, "learning_rate": 2.7720000000000002e-05, "loss": 0.2861, "step": 6703 }, { "epoch": 23.117241379310343, "grad_norm": 0.8824370503425598, "learning_rate": 2.772413793103448e-05, "loss": 0.2557, "step": 6704 }, { "epoch": 23.120689655172413, "grad_norm": 0.7054877877235413, "learning_rate": 2.7728275862068966e-05, "loss": 0.2312, "step": 6705 }, { "epoch": 23.124137931034483, "grad_norm": 0.6463795900344849, "learning_rate": 2.773241379310345e-05, "loss": 0.2692, "step": 6706 }, { "epoch": 23.127586206896552, "grad_norm": 0.7931952476501465, "learning_rate": 2.7736551724137934e-05, "loss": 0.2575, "step": 6707 }, { "epoch": 23.131034482758622, "grad_norm": 0.6407735347747803, "learning_rate": 2.7740689655172412e-05, "loss": 0.2529, "step": 6708 }, { "epoch": 23.13448275862069, "grad_norm": 0.9805240631103516, "learning_rate": 2.7744827586206898e-05, "loss": 0.2355, "step": 6709 }, { "epoch": 23.137931034482758, "grad_norm": 1.4086030721664429, "learning_rate": 2.774896551724138e-05, "loss": 0.2818, "step": 6710 }, { "epoch": 23.141379310344828, "grad_norm": 1.4571796655654907, "learning_rate": 2.7753103448275865e-05, "loss": 0.2317, "step": 6711 }, { "epoch": 23.144827586206898, "grad_norm": 1.3841092586517334, "learning_rate": 2.7757241379310344e-05, "loss": 0.2579, "step": 6712 }, { "epoch": 23.148275862068967, "grad_norm": 1.0702755451202393, "learning_rate": 2.776137931034483e-05, "loss": 0.2207, "step": 6713 }, { "epoch": 23.151724137931033, "grad_norm": 1.1152905225753784, "learning_rate": 2.776551724137931e-05, "loss": 0.2559, "step": 6714 }, { "epoch": 23.155172413793103, "grad_norm": 0.6790586709976196, "learning_rate": 2.7769655172413793e-05, "loss": 0.2079, "step": 6715 }, { "epoch": 23.158620689655173, "grad_norm": 1.0344818830490112, "learning_rate": 2.7773793103448275e-05, "loss": 0.2458, "step": 6716 }, { "epoch": 23.162068965517243, "grad_norm": 1.0286650657653809, "learning_rate": 2.7777931034482757e-05, "loss": 0.2214, "step": 6717 }, { "epoch": 23.16551724137931, "grad_norm": 1.697938323020935, "learning_rate": 2.7782068965517243e-05, "loss": 0.23, "step": 6718 }, { "epoch": 23.16896551724138, "grad_norm": 1.3748259544372559, "learning_rate": 2.7786206896551725e-05, "loss": 0.2811, "step": 6719 }, { "epoch": 23.17241379310345, "grad_norm": 1.9976835250854492, "learning_rate": 2.7790344827586207e-05, "loss": 0.335, "step": 6720 }, { "epoch": 23.175862068965518, "grad_norm": 1.2675127983093262, "learning_rate": 2.779448275862069e-05, "loss": 0.4, "step": 6721 }, { "epoch": 23.179310344827588, "grad_norm": 0.671818196773529, "learning_rate": 2.7798620689655174e-05, "loss": 0.2346, "step": 6722 }, { "epoch": 23.182758620689654, "grad_norm": 0.5354052186012268, "learning_rate": 2.7802758620689656e-05, "loss": 0.2556, "step": 6723 }, { "epoch": 23.186206896551724, "grad_norm": 2.874044895172119, "learning_rate": 2.780689655172414e-05, "loss": 0.2641, "step": 6724 }, { "epoch": 23.189655172413794, "grad_norm": 1.3525220155715942, "learning_rate": 2.781103448275862e-05, "loss": 0.2768, "step": 6725 }, { "epoch": 23.193103448275863, "grad_norm": 0.6790944337844849, "learning_rate": 2.7815172413793106e-05, "loss": 0.2586, "step": 6726 }, { "epoch": 23.19655172413793, "grad_norm": 1.605414867401123, "learning_rate": 2.7819310344827588e-05, "loss": 0.2529, "step": 6727 }, { "epoch": 23.2, "grad_norm": 0.7195449471473694, "learning_rate": 2.782344827586207e-05, "loss": 0.2432, "step": 6728 }, { "epoch": 23.20344827586207, "grad_norm": 0.7571009993553162, "learning_rate": 2.782758620689655e-05, "loss": 0.2329, "step": 6729 }, { "epoch": 23.20689655172414, "grad_norm": 1.0303163528442383, "learning_rate": 2.7831724137931034e-05, "loss": 0.2623, "step": 6730 }, { "epoch": 23.21034482758621, "grad_norm": 0.7895858883857727, "learning_rate": 2.783586206896552e-05, "loss": 0.2608, "step": 6731 }, { "epoch": 23.213793103448275, "grad_norm": 0.5794463753700256, "learning_rate": 2.784e-05, "loss": 0.2611, "step": 6732 }, { "epoch": 23.217241379310344, "grad_norm": 0.6917489767074585, "learning_rate": 2.7844137931034483e-05, "loss": 0.248, "step": 6733 }, { "epoch": 23.220689655172414, "grad_norm": 0.9150142669677734, "learning_rate": 2.7848275862068965e-05, "loss": 0.2434, "step": 6734 }, { "epoch": 23.224137931034484, "grad_norm": 0.8743758797645569, "learning_rate": 2.785241379310345e-05, "loss": 0.2606, "step": 6735 }, { "epoch": 23.22758620689655, "grad_norm": 0.7529441118240356, "learning_rate": 2.7856551724137933e-05, "loss": 0.2417, "step": 6736 }, { "epoch": 23.23103448275862, "grad_norm": 0.9205895662307739, "learning_rate": 2.7860689655172415e-05, "loss": 0.2439, "step": 6737 }, { "epoch": 23.23448275862069, "grad_norm": 0.8860118985176086, "learning_rate": 2.7864827586206897e-05, "loss": 0.2275, "step": 6738 }, { "epoch": 23.23793103448276, "grad_norm": 1.2232022285461426, "learning_rate": 2.7868965517241382e-05, "loss": 0.2456, "step": 6739 }, { "epoch": 23.24137931034483, "grad_norm": 1.2915459871292114, "learning_rate": 2.7873103448275864e-05, "loss": 0.2616, "step": 6740 }, { "epoch": 23.244827586206895, "grad_norm": 0.8227635622024536, "learning_rate": 2.7877241379310343e-05, "loss": 0.2739, "step": 6741 }, { "epoch": 23.248275862068965, "grad_norm": 0.8605478405952454, "learning_rate": 2.7881379310344828e-05, "loss": 0.2063, "step": 6742 }, { "epoch": 23.251724137931035, "grad_norm": 0.7211580276489258, "learning_rate": 2.788551724137931e-05, "loss": 0.2369, "step": 6743 }, { "epoch": 23.255172413793105, "grad_norm": 1.3456679582595825, "learning_rate": 2.7889655172413795e-05, "loss": 0.2308, "step": 6744 }, { "epoch": 23.25862068965517, "grad_norm": 1.6617447137832642, "learning_rate": 2.7893793103448274e-05, "loss": 0.3179, "step": 6745 }, { "epoch": 23.26206896551724, "grad_norm": 0.9170164465904236, "learning_rate": 2.789793103448276e-05, "loss": 0.3247, "step": 6746 }, { "epoch": 23.26551724137931, "grad_norm": 0.8270319104194641, "learning_rate": 2.790206896551724e-05, "loss": 0.2871, "step": 6747 }, { "epoch": 23.26896551724138, "grad_norm": 1.9256871938705444, "learning_rate": 2.7906206896551727e-05, "loss": 0.2518, "step": 6748 }, { "epoch": 23.27241379310345, "grad_norm": 1.0060434341430664, "learning_rate": 2.7910344827586206e-05, "loss": 0.2806, "step": 6749 }, { "epoch": 23.275862068965516, "grad_norm": 0.9432069063186646, "learning_rate": 2.791448275862069e-05, "loss": 0.2581, "step": 6750 }, { "epoch": 23.279310344827586, "grad_norm": 0.9936709403991699, "learning_rate": 2.7918620689655173e-05, "loss": 0.2598, "step": 6751 }, { "epoch": 23.282758620689656, "grad_norm": 0.5943319201469421, "learning_rate": 2.792275862068966e-05, "loss": 0.2576, "step": 6752 }, { "epoch": 23.286206896551725, "grad_norm": 1.0612859725952148, "learning_rate": 2.7926896551724137e-05, "loss": 0.2637, "step": 6753 }, { "epoch": 23.28965517241379, "grad_norm": 0.7621653079986572, "learning_rate": 2.793103448275862e-05, "loss": 0.2492, "step": 6754 }, { "epoch": 23.29310344827586, "grad_norm": 1.4831514358520508, "learning_rate": 2.7935172413793104e-05, "loss": 0.269, "step": 6755 }, { "epoch": 23.29655172413793, "grad_norm": 0.6153885126113892, "learning_rate": 2.7939310344827586e-05, "loss": 0.2542, "step": 6756 }, { "epoch": 23.3, "grad_norm": 0.7397609353065491, "learning_rate": 2.7943448275862072e-05, "loss": 0.2835, "step": 6757 }, { "epoch": 23.30344827586207, "grad_norm": 0.7996677160263062, "learning_rate": 2.794758620689655e-05, "loss": 0.2673, "step": 6758 }, { "epoch": 23.306896551724137, "grad_norm": 0.7425705790519714, "learning_rate": 2.7951724137931036e-05, "loss": 0.2273, "step": 6759 }, { "epoch": 23.310344827586206, "grad_norm": 0.6591310501098633, "learning_rate": 2.7955862068965518e-05, "loss": 0.2534, "step": 6760 }, { "epoch": 23.313793103448276, "grad_norm": 1.122663974761963, "learning_rate": 2.7960000000000003e-05, "loss": 0.2549, "step": 6761 }, { "epoch": 23.317241379310346, "grad_norm": 1.00774347782135, "learning_rate": 2.7964137931034482e-05, "loss": 0.2657, "step": 6762 }, { "epoch": 23.320689655172412, "grad_norm": 1.8909214735031128, "learning_rate": 2.7968275862068967e-05, "loss": 0.2321, "step": 6763 }, { "epoch": 23.324137931034482, "grad_norm": 1.9442802667617798, "learning_rate": 2.797241379310345e-05, "loss": 0.2335, "step": 6764 }, { "epoch": 23.32758620689655, "grad_norm": 1.1370829343795776, "learning_rate": 2.7976551724137935e-05, "loss": 0.2472, "step": 6765 }, { "epoch": 23.33103448275862, "grad_norm": 0.9460655450820923, "learning_rate": 2.7980689655172413e-05, "loss": 0.2395, "step": 6766 }, { "epoch": 23.33448275862069, "grad_norm": 0.8410382866859436, "learning_rate": 2.7984827586206895e-05, "loss": 0.2345, "step": 6767 }, { "epoch": 23.337931034482757, "grad_norm": 1.1176934242248535, "learning_rate": 2.798896551724138e-05, "loss": 0.246, "step": 6768 }, { "epoch": 23.341379310344827, "grad_norm": 1.8761570453643799, "learning_rate": 2.7993103448275863e-05, "loss": 0.2826, "step": 6769 }, { "epoch": 23.344827586206897, "grad_norm": 1.4706177711486816, "learning_rate": 2.7997241379310345e-05, "loss": 0.3547, "step": 6770 }, { "epoch": 23.348275862068967, "grad_norm": 0.606780469417572, "learning_rate": 2.8001379310344827e-05, "loss": 0.3339, "step": 6771 }, { "epoch": 23.351724137931033, "grad_norm": 0.6213313937187195, "learning_rate": 2.8005517241379312e-05, "loss": 0.3218, "step": 6772 }, { "epoch": 23.355172413793102, "grad_norm": 0.48544541001319885, "learning_rate": 2.8009655172413794e-05, "loss": 0.2595, "step": 6773 }, { "epoch": 23.358620689655172, "grad_norm": 2.3094868659973145, "learning_rate": 2.8013793103448276e-05, "loss": 0.2861, "step": 6774 }, { "epoch": 23.362068965517242, "grad_norm": 0.73713219165802, "learning_rate": 2.801793103448276e-05, "loss": 0.26, "step": 6775 }, { "epoch": 23.36551724137931, "grad_norm": 0.7507039308547974, "learning_rate": 2.8022068965517244e-05, "loss": 0.2776, "step": 6776 }, { "epoch": 23.368965517241378, "grad_norm": 0.8621233105659485, "learning_rate": 2.8026206896551726e-05, "loss": 0.2417, "step": 6777 }, { "epoch": 23.372413793103448, "grad_norm": 0.6694278717041016, "learning_rate": 2.8030344827586208e-05, "loss": 0.2593, "step": 6778 }, { "epoch": 23.375862068965517, "grad_norm": 0.6034671068191528, "learning_rate": 2.803448275862069e-05, "loss": 0.256, "step": 6779 }, { "epoch": 23.379310344827587, "grad_norm": 0.531989336013794, "learning_rate": 2.8038620689655172e-05, "loss": 0.2132, "step": 6780 }, { "epoch": 23.382758620689657, "grad_norm": 0.6075157523155212, "learning_rate": 2.8042758620689657e-05, "loss": 0.239, "step": 6781 }, { "epoch": 23.386206896551723, "grad_norm": 0.8109020590782166, "learning_rate": 2.8046896551724136e-05, "loss": 0.2824, "step": 6782 }, { "epoch": 23.389655172413793, "grad_norm": 0.8694270849227905, "learning_rate": 2.805103448275862e-05, "loss": 0.2154, "step": 6783 }, { "epoch": 23.393103448275863, "grad_norm": 1.547438621520996, "learning_rate": 2.8055172413793103e-05, "loss": 0.2528, "step": 6784 }, { "epoch": 23.396551724137932, "grad_norm": 1.924088954925537, "learning_rate": 2.805931034482759e-05, "loss": 0.2503, "step": 6785 }, { "epoch": 23.4, "grad_norm": 0.8839056491851807, "learning_rate": 2.8063448275862067e-05, "loss": 0.2328, "step": 6786 }, { "epoch": 23.40344827586207, "grad_norm": 0.7248367667198181, "learning_rate": 2.8067586206896553e-05, "loss": 0.2329, "step": 6787 }, { "epoch": 23.406896551724138, "grad_norm": 0.9687523245811462, "learning_rate": 2.8071724137931035e-05, "loss": 0.2238, "step": 6788 }, { "epoch": 23.410344827586208, "grad_norm": 1.8875548839569092, "learning_rate": 2.807586206896552e-05, "loss": 0.2596, "step": 6789 }, { "epoch": 23.413793103448278, "grad_norm": 1.1257742643356323, "learning_rate": 2.8080000000000002e-05, "loss": 0.24, "step": 6790 }, { "epoch": 23.417241379310344, "grad_norm": 0.8459873199462891, "learning_rate": 2.8084137931034484e-05, "loss": 0.2611, "step": 6791 }, { "epoch": 23.420689655172414, "grad_norm": 0.9903050065040588, "learning_rate": 2.8088275862068966e-05, "loss": 0.2545, "step": 6792 }, { "epoch": 23.424137931034483, "grad_norm": 1.0216537714004517, "learning_rate": 2.8092413793103448e-05, "loss": 0.2232, "step": 6793 }, { "epoch": 23.427586206896553, "grad_norm": 1.5337754487991333, "learning_rate": 2.8096551724137934e-05, "loss": 0.2893, "step": 6794 }, { "epoch": 23.43103448275862, "grad_norm": 1.6188820600509644, "learning_rate": 2.8100689655172412e-05, "loss": 0.3419, "step": 6795 }, { "epoch": 23.43448275862069, "grad_norm": 0.5548383593559265, "learning_rate": 2.8104827586206898e-05, "loss": 0.3004, "step": 6796 }, { "epoch": 23.43793103448276, "grad_norm": 0.5510181188583374, "learning_rate": 2.810896551724138e-05, "loss": 0.2656, "step": 6797 }, { "epoch": 23.44137931034483, "grad_norm": 0.48377344012260437, "learning_rate": 2.8113103448275865e-05, "loss": 0.2644, "step": 6798 }, { "epoch": 23.444827586206898, "grad_norm": 1.3875393867492676, "learning_rate": 2.8117241379310344e-05, "loss": 0.2389, "step": 6799 }, { "epoch": 23.448275862068964, "grad_norm": 0.684660792350769, "learning_rate": 2.812137931034483e-05, "loss": 0.2667, "step": 6800 }, { "epoch": 23.451724137931034, "grad_norm": 1.0957437753677368, "learning_rate": 2.812551724137931e-05, "loss": 0.2608, "step": 6801 }, { "epoch": 23.455172413793104, "grad_norm": 1.3010938167572021, "learning_rate": 2.8129655172413797e-05, "loss": 0.2485, "step": 6802 }, { "epoch": 23.458620689655174, "grad_norm": 0.7992758750915527, "learning_rate": 2.8133793103448275e-05, "loss": 0.2468, "step": 6803 }, { "epoch": 23.46206896551724, "grad_norm": 0.643406867980957, "learning_rate": 2.813793103448276e-05, "loss": 0.2348, "step": 6804 }, { "epoch": 23.46551724137931, "grad_norm": 0.6625092029571533, "learning_rate": 2.8142068965517243e-05, "loss": 0.2299, "step": 6805 }, { "epoch": 23.46896551724138, "grad_norm": 0.7734584808349609, "learning_rate": 2.8146206896551725e-05, "loss": 0.2491, "step": 6806 }, { "epoch": 23.47241379310345, "grad_norm": 0.7644239664077759, "learning_rate": 2.8150344827586207e-05, "loss": 0.2636, "step": 6807 }, { "epoch": 23.47586206896552, "grad_norm": 0.6607473492622375, "learning_rate": 2.815448275862069e-05, "loss": 0.2609, "step": 6808 }, { "epoch": 23.479310344827585, "grad_norm": 0.6328778862953186, "learning_rate": 2.8158620689655174e-05, "loss": 0.2494, "step": 6809 }, { "epoch": 23.482758620689655, "grad_norm": 0.6766360998153687, "learning_rate": 2.8162758620689656e-05, "loss": 0.2132, "step": 6810 }, { "epoch": 23.486206896551725, "grad_norm": 0.8412011861801147, "learning_rate": 2.8166896551724138e-05, "loss": 0.2234, "step": 6811 }, { "epoch": 23.489655172413794, "grad_norm": 0.7516939640045166, "learning_rate": 2.817103448275862e-05, "loss": 0.2256, "step": 6812 }, { "epoch": 23.49310344827586, "grad_norm": 0.7073161602020264, "learning_rate": 2.8175172413793106e-05, "loss": 0.2495, "step": 6813 }, { "epoch": 23.49655172413793, "grad_norm": 0.8247694969177246, "learning_rate": 2.8179310344827588e-05, "loss": 0.239, "step": 6814 }, { "epoch": 23.5, "grad_norm": 0.9124959707260132, "learning_rate": 2.818344827586207e-05, "loss": 0.2298, "step": 6815 }, { "epoch": 23.50344827586207, "grad_norm": 1.2590405941009521, "learning_rate": 2.818758620689655e-05, "loss": 0.245, "step": 6816 }, { "epoch": 23.50689655172414, "grad_norm": 0.8281029462814331, "learning_rate": 2.8191724137931034e-05, "loss": 0.2588, "step": 6817 }, { "epoch": 23.510344827586206, "grad_norm": 1.0102320909500122, "learning_rate": 2.819586206896552e-05, "loss": 0.2573, "step": 6818 }, { "epoch": 23.513793103448275, "grad_norm": 1.262884259223938, "learning_rate": 2.8199999999999998e-05, "loss": 0.2745, "step": 6819 }, { "epoch": 23.517241379310345, "grad_norm": 1.4635465145111084, "learning_rate": 2.8204137931034483e-05, "loss": 0.3917, "step": 6820 }, { "epoch": 23.520689655172415, "grad_norm": 0.5478545427322388, "learning_rate": 2.8208275862068965e-05, "loss": 0.2772, "step": 6821 }, { "epoch": 23.52413793103448, "grad_norm": 1.1570477485656738, "learning_rate": 2.821241379310345e-05, "loss": 0.2973, "step": 6822 }, { "epoch": 23.52758620689655, "grad_norm": 1.0859284400939941, "learning_rate": 2.8216551724137933e-05, "loss": 0.3141, "step": 6823 }, { "epoch": 23.53103448275862, "grad_norm": 0.5431835651397705, "learning_rate": 2.8220689655172415e-05, "loss": 0.2538, "step": 6824 }, { "epoch": 23.53448275862069, "grad_norm": 0.8737290501594543, "learning_rate": 2.8224827586206897e-05, "loss": 0.2571, "step": 6825 }, { "epoch": 23.53793103448276, "grad_norm": 1.067776083946228, "learning_rate": 2.8228965517241382e-05, "loss": 0.2896, "step": 6826 }, { "epoch": 23.541379310344826, "grad_norm": 0.7169477343559265, "learning_rate": 2.8233103448275864e-05, "loss": 0.2661, "step": 6827 }, { "epoch": 23.544827586206896, "grad_norm": 0.6355435848236084, "learning_rate": 2.8237241379310346e-05, "loss": 0.2361, "step": 6828 }, { "epoch": 23.548275862068966, "grad_norm": 0.7846996188163757, "learning_rate": 2.8241379310344828e-05, "loss": 0.2647, "step": 6829 }, { "epoch": 23.551724137931036, "grad_norm": 2.240196704864502, "learning_rate": 2.824551724137931e-05, "loss": 0.2299, "step": 6830 }, { "epoch": 23.555172413793102, "grad_norm": 0.5487576127052307, "learning_rate": 2.8249655172413795e-05, "loss": 0.2513, "step": 6831 }, { "epoch": 23.55862068965517, "grad_norm": 0.7109092473983765, "learning_rate": 2.8253793103448274e-05, "loss": 0.2433, "step": 6832 }, { "epoch": 23.56206896551724, "grad_norm": 0.507500410079956, "learning_rate": 2.825793103448276e-05, "loss": 0.2182, "step": 6833 }, { "epoch": 23.56551724137931, "grad_norm": 0.8293949961662292, "learning_rate": 2.826206896551724e-05, "loss": 0.2495, "step": 6834 }, { "epoch": 23.56896551724138, "grad_norm": 0.6422624588012695, "learning_rate": 2.8266206896551727e-05, "loss": 0.2295, "step": 6835 }, { "epoch": 23.572413793103447, "grad_norm": 1.5158624649047852, "learning_rate": 2.8270344827586206e-05, "loss": 0.248, "step": 6836 }, { "epoch": 23.575862068965517, "grad_norm": 0.7640183568000793, "learning_rate": 2.827448275862069e-05, "loss": 0.2269, "step": 6837 }, { "epoch": 23.579310344827586, "grad_norm": 0.736403226852417, "learning_rate": 2.8278620689655173e-05, "loss": 0.2356, "step": 6838 }, { "epoch": 23.582758620689656, "grad_norm": 0.7684931755065918, "learning_rate": 2.828275862068966e-05, "loss": 0.2465, "step": 6839 }, { "epoch": 23.586206896551722, "grad_norm": 0.5862461924552917, "learning_rate": 2.8286896551724137e-05, "loss": 0.2192, "step": 6840 }, { "epoch": 23.589655172413792, "grad_norm": 1.0485610961914062, "learning_rate": 2.8291034482758622e-05, "loss": 0.258, "step": 6841 }, { "epoch": 23.593103448275862, "grad_norm": 1.5077234506607056, "learning_rate": 2.8295172413793104e-05, "loss": 0.2568, "step": 6842 }, { "epoch": 23.59655172413793, "grad_norm": 2.0046982765197754, "learning_rate": 2.8299310344827586e-05, "loss": 0.2391, "step": 6843 }, { "epoch": 23.6, "grad_norm": 2.904313325881958, "learning_rate": 2.830344827586207e-05, "loss": 0.2495, "step": 6844 }, { "epoch": 23.603448275862068, "grad_norm": 1.7831108570098877, "learning_rate": 2.830758620689655e-05, "loss": 0.3373, "step": 6845 }, { "epoch": 23.606896551724137, "grad_norm": 0.49623528122901917, "learning_rate": 2.8311724137931036e-05, "loss": 0.2612, "step": 6846 }, { "epoch": 23.610344827586207, "grad_norm": 0.6853732466697693, "learning_rate": 2.8315862068965518e-05, "loss": 0.2542, "step": 6847 }, { "epoch": 23.613793103448277, "grad_norm": 0.5340944528579712, "learning_rate": 2.832e-05, "loss": 0.2506, "step": 6848 }, { "epoch": 23.617241379310343, "grad_norm": 0.609679639339447, "learning_rate": 2.8324137931034482e-05, "loss": 0.2591, "step": 6849 }, { "epoch": 23.620689655172413, "grad_norm": 0.6071009635925293, "learning_rate": 2.8328275862068967e-05, "loss": 0.2604, "step": 6850 }, { "epoch": 23.624137931034483, "grad_norm": 0.5960924029350281, "learning_rate": 2.833241379310345e-05, "loss": 0.2773, "step": 6851 }, { "epoch": 23.627586206896552, "grad_norm": 0.5852025151252747, "learning_rate": 2.833655172413793e-05, "loss": 0.273, "step": 6852 }, { "epoch": 23.631034482758622, "grad_norm": 0.6509747505187988, "learning_rate": 2.8340689655172413e-05, "loss": 0.2748, "step": 6853 }, { "epoch": 23.63448275862069, "grad_norm": 0.594007134437561, "learning_rate": 2.83448275862069e-05, "loss": 0.2314, "step": 6854 }, { "epoch": 23.637931034482758, "grad_norm": 1.0017858743667603, "learning_rate": 2.834896551724138e-05, "loss": 0.2921, "step": 6855 }, { "epoch": 23.641379310344828, "grad_norm": 0.8883485198020935, "learning_rate": 2.8353103448275863e-05, "loss": 0.2586, "step": 6856 }, { "epoch": 23.644827586206898, "grad_norm": 0.8172231912612915, "learning_rate": 2.8357241379310345e-05, "loss": 0.2456, "step": 6857 }, { "epoch": 23.648275862068964, "grad_norm": 0.6990904808044434, "learning_rate": 2.8361379310344827e-05, "loss": 0.214, "step": 6858 }, { "epoch": 23.651724137931033, "grad_norm": 0.9442504048347473, "learning_rate": 2.8365517241379312e-05, "loss": 0.2443, "step": 6859 }, { "epoch": 23.655172413793103, "grad_norm": 0.934100329875946, "learning_rate": 2.8369655172413794e-05, "loss": 0.2566, "step": 6860 }, { "epoch": 23.658620689655173, "grad_norm": 0.7060658931732178, "learning_rate": 2.8373793103448276e-05, "loss": 0.2453, "step": 6861 }, { "epoch": 23.662068965517243, "grad_norm": 1.2682795524597168, "learning_rate": 2.837793103448276e-05, "loss": 0.2104, "step": 6862 }, { "epoch": 23.66551724137931, "grad_norm": 1.3542782068252563, "learning_rate": 2.8382068965517244e-05, "loss": 0.2525, "step": 6863 }, { "epoch": 23.66896551724138, "grad_norm": 1.3304672241210938, "learning_rate": 2.8386206896551726e-05, "loss": 0.217, "step": 6864 }, { "epoch": 23.67241379310345, "grad_norm": 1.0433545112609863, "learning_rate": 2.8390344827586208e-05, "loss": 0.2437, "step": 6865 }, { "epoch": 23.675862068965518, "grad_norm": 1.0311851501464844, "learning_rate": 2.839448275862069e-05, "loss": 0.2801, "step": 6866 }, { "epoch": 23.679310344827588, "grad_norm": 6.829832077026367, "learning_rate": 2.8398620689655175e-05, "loss": 0.2464, "step": 6867 }, { "epoch": 23.682758620689654, "grad_norm": 1.1470890045166016, "learning_rate": 2.8402758620689657e-05, "loss": 0.2851, "step": 6868 }, { "epoch": 23.686206896551724, "grad_norm": 4.621748924255371, "learning_rate": 2.8406896551724136e-05, "loss": 0.2628, "step": 6869 }, { "epoch": 23.689655172413794, "grad_norm": 3.0906612873077393, "learning_rate": 2.841103448275862e-05, "loss": 0.3407, "step": 6870 }, { "epoch": 23.693103448275863, "grad_norm": 0.7013457417488098, "learning_rate": 2.8415172413793103e-05, "loss": 0.3009, "step": 6871 }, { "epoch": 23.69655172413793, "grad_norm": 1.2077949047088623, "learning_rate": 2.841931034482759e-05, "loss": 0.2618, "step": 6872 }, { "epoch": 23.7, "grad_norm": 0.5682526230812073, "learning_rate": 2.8423448275862067e-05, "loss": 0.2841, "step": 6873 }, { "epoch": 23.70344827586207, "grad_norm": 0.9809613227844238, "learning_rate": 2.8427586206896553e-05, "loss": 0.2632, "step": 6874 }, { "epoch": 23.70689655172414, "grad_norm": 0.6703615188598633, "learning_rate": 2.8431724137931035e-05, "loss": 0.2579, "step": 6875 }, { "epoch": 23.71034482758621, "grad_norm": 0.564156711101532, "learning_rate": 2.843586206896552e-05, "loss": 0.2597, "step": 6876 }, { "epoch": 23.713793103448275, "grad_norm": 0.9772621989250183, "learning_rate": 2.844e-05, "loss": 0.2607, "step": 6877 }, { "epoch": 23.717241379310344, "grad_norm": 0.660474956035614, "learning_rate": 2.8444137931034484e-05, "loss": 0.2319, "step": 6878 }, { "epoch": 23.720689655172414, "grad_norm": 0.6482490301132202, "learning_rate": 2.8448275862068966e-05, "loss": 0.2394, "step": 6879 }, { "epoch": 23.724137931034484, "grad_norm": 0.567969024181366, "learning_rate": 2.845241379310345e-05, "loss": 0.2425, "step": 6880 }, { "epoch": 23.72758620689655, "grad_norm": 1.0691009759902954, "learning_rate": 2.845655172413793e-05, "loss": 0.2236, "step": 6881 }, { "epoch": 23.73103448275862, "grad_norm": 0.7207668423652649, "learning_rate": 2.8460689655172412e-05, "loss": 0.2821, "step": 6882 }, { "epoch": 23.73448275862069, "grad_norm": 1.0581786632537842, "learning_rate": 2.8464827586206898e-05, "loss": 0.2511, "step": 6883 }, { "epoch": 23.73793103448276, "grad_norm": 2.0133581161499023, "learning_rate": 2.846896551724138e-05, "loss": 0.2306, "step": 6884 }, { "epoch": 23.74137931034483, "grad_norm": 1.0369880199432373, "learning_rate": 2.847310344827586e-05, "loss": 0.2216, "step": 6885 }, { "epoch": 23.744827586206895, "grad_norm": 0.6266427636146545, "learning_rate": 2.8477241379310344e-05, "loss": 0.2633, "step": 6886 }, { "epoch": 23.748275862068965, "grad_norm": 0.858310878276825, "learning_rate": 2.848137931034483e-05, "loss": 0.2206, "step": 6887 }, { "epoch": 23.751724137931035, "grad_norm": 1.206257939338684, "learning_rate": 2.848551724137931e-05, "loss": 0.2555, "step": 6888 }, { "epoch": 23.755172413793105, "grad_norm": 1.3814499378204346, "learning_rate": 2.8489655172413797e-05, "loss": 0.2153, "step": 6889 }, { "epoch": 23.75862068965517, "grad_norm": 1.073694109916687, "learning_rate": 2.8493793103448275e-05, "loss": 0.2588, "step": 6890 }, { "epoch": 23.76206896551724, "grad_norm": 0.8656123876571655, "learning_rate": 2.849793103448276e-05, "loss": 0.2592, "step": 6891 }, { "epoch": 23.76551724137931, "grad_norm": 1.3848843574523926, "learning_rate": 2.8502068965517243e-05, "loss": 0.2419, "step": 6892 }, { "epoch": 23.76896551724138, "grad_norm": 1.1902644634246826, "learning_rate": 2.8506206896551728e-05, "loss": 0.2458, "step": 6893 }, { "epoch": 23.77241379310345, "grad_norm": 1.3075443506240845, "learning_rate": 2.8510344827586207e-05, "loss": 0.2619, "step": 6894 }, { "epoch": 23.775862068965516, "grad_norm": 1.6069400310516357, "learning_rate": 2.851448275862069e-05, "loss": 0.3453, "step": 6895 }, { "epoch": 23.779310344827586, "grad_norm": 0.6705116629600525, "learning_rate": 2.8518620689655174e-05, "loss": 0.3418, "step": 6896 }, { "epoch": 23.782758620689656, "grad_norm": 0.6424195766448975, "learning_rate": 2.8522758620689656e-05, "loss": 0.2782, "step": 6897 }, { "epoch": 23.786206896551725, "grad_norm": 0.6966106295585632, "learning_rate": 2.8526896551724138e-05, "loss": 0.2838, "step": 6898 }, { "epoch": 23.78965517241379, "grad_norm": 0.9441041350364685, "learning_rate": 2.853103448275862e-05, "loss": 0.2665, "step": 6899 }, { "epoch": 23.79310344827586, "grad_norm": 1.0217431783676147, "learning_rate": 2.8535172413793105e-05, "loss": 0.2373, "step": 6900 }, { "epoch": 23.79655172413793, "grad_norm": 0.983432412147522, "learning_rate": 2.8539310344827588e-05, "loss": 0.2709, "step": 6901 }, { "epoch": 23.8, "grad_norm": 0.7428366541862488, "learning_rate": 2.854344827586207e-05, "loss": 0.2977, "step": 6902 }, { "epoch": 23.80344827586207, "grad_norm": 0.8002331256866455, "learning_rate": 2.854758620689655e-05, "loss": 0.2663, "step": 6903 }, { "epoch": 23.806896551724137, "grad_norm": 1.6581593751907349, "learning_rate": 2.8551724137931037e-05, "loss": 0.2492, "step": 6904 }, { "epoch": 23.810344827586206, "grad_norm": 0.5696295499801636, "learning_rate": 2.855586206896552e-05, "loss": 0.2581, "step": 6905 }, { "epoch": 23.813793103448276, "grad_norm": 0.566615641117096, "learning_rate": 2.856e-05, "loss": 0.2103, "step": 6906 }, { "epoch": 23.817241379310346, "grad_norm": 0.6402053833007812, "learning_rate": 2.8564137931034483e-05, "loss": 0.219, "step": 6907 }, { "epoch": 23.820689655172412, "grad_norm": 0.7147437334060669, "learning_rate": 2.8568275862068965e-05, "loss": 0.2345, "step": 6908 }, { "epoch": 23.824137931034482, "grad_norm": 2.0350475311279297, "learning_rate": 2.857241379310345e-05, "loss": 0.2469, "step": 6909 }, { "epoch": 23.82758620689655, "grad_norm": 1.7249763011932373, "learning_rate": 2.857655172413793e-05, "loss": 0.2605, "step": 6910 }, { "epoch": 23.83103448275862, "grad_norm": 1.0047948360443115, "learning_rate": 2.8580689655172414e-05, "loss": 0.2192, "step": 6911 }, { "epoch": 23.83448275862069, "grad_norm": 1.0755339860916138, "learning_rate": 2.8584827586206896e-05, "loss": 0.2397, "step": 6912 }, { "epoch": 23.837931034482757, "grad_norm": 1.2579450607299805, "learning_rate": 2.8588965517241382e-05, "loss": 0.2589, "step": 6913 }, { "epoch": 23.841379310344827, "grad_norm": 0.7618259191513062, "learning_rate": 2.859310344827586e-05, "loss": 0.2394, "step": 6914 }, { "epoch": 23.844827586206897, "grad_norm": 1.2540944814682007, "learning_rate": 2.8597241379310346e-05, "loss": 0.2434, "step": 6915 }, { "epoch": 23.848275862068967, "grad_norm": 1.1287634372711182, "learning_rate": 2.8601379310344828e-05, "loss": 0.2604, "step": 6916 }, { "epoch": 23.851724137931036, "grad_norm": 2.115175724029541, "learning_rate": 2.8605517241379313e-05, "loss": 0.2472, "step": 6917 }, { "epoch": 23.855172413793102, "grad_norm": 0.8222931027412415, "learning_rate": 2.8609655172413792e-05, "loss": 0.2519, "step": 6918 }, { "epoch": 23.858620689655172, "grad_norm": 1.0833200216293335, "learning_rate": 2.8613793103448277e-05, "loss": 0.2584, "step": 6919 }, { "epoch": 23.862068965517242, "grad_norm": 1.2781329154968262, "learning_rate": 2.861793103448276e-05, "loss": 0.3572, "step": 6920 }, { "epoch": 23.86551724137931, "grad_norm": 0.6893566846847534, "learning_rate": 2.862206896551724e-05, "loss": 0.2778, "step": 6921 }, { "epoch": 23.868965517241378, "grad_norm": 1.3950384855270386, "learning_rate": 2.8626206896551727e-05, "loss": 0.2963, "step": 6922 }, { "epoch": 23.872413793103448, "grad_norm": 0.8682111501693726, "learning_rate": 2.8630344827586205e-05, "loss": 0.3054, "step": 6923 }, { "epoch": 23.875862068965517, "grad_norm": 0.6937564611434937, "learning_rate": 2.863448275862069e-05, "loss": 0.2832, "step": 6924 }, { "epoch": 23.879310344827587, "grad_norm": 1.0145578384399414, "learning_rate": 2.8638620689655173e-05, "loss": 0.2888, "step": 6925 }, { "epoch": 23.882758620689657, "grad_norm": 0.7913023829460144, "learning_rate": 2.8642758620689658e-05, "loss": 0.2715, "step": 6926 }, { "epoch": 23.886206896551723, "grad_norm": 0.8669017553329468, "learning_rate": 2.8646896551724137e-05, "loss": 0.2574, "step": 6927 }, { "epoch": 23.889655172413793, "grad_norm": 1.4897282123565674, "learning_rate": 2.8651034482758622e-05, "loss": 0.2646, "step": 6928 }, { "epoch": 23.893103448275863, "grad_norm": 1.741740345954895, "learning_rate": 2.8655172413793104e-05, "loss": 0.274, "step": 6929 }, { "epoch": 23.896551724137932, "grad_norm": 1.0542888641357422, "learning_rate": 2.865931034482759e-05, "loss": 0.2344, "step": 6930 }, { "epoch": 23.9, "grad_norm": 1.407447338104248, "learning_rate": 2.866344827586207e-05, "loss": 0.2653, "step": 6931 }, { "epoch": 23.90344827586207, "grad_norm": 0.7747410535812378, "learning_rate": 2.8667586206896554e-05, "loss": 0.2361, "step": 6932 }, { "epoch": 23.906896551724138, "grad_norm": 2.0012624263763428, "learning_rate": 2.8671724137931036e-05, "loss": 0.2676, "step": 6933 }, { "epoch": 23.910344827586208, "grad_norm": 4.343747615814209, "learning_rate": 2.8675862068965518e-05, "loss": 0.2794, "step": 6934 }, { "epoch": 23.913793103448278, "grad_norm": 0.8451603055000305, "learning_rate": 2.868e-05, "loss": 0.2885, "step": 6935 }, { "epoch": 23.917241379310344, "grad_norm": 0.9026473760604858, "learning_rate": 2.8684137931034482e-05, "loss": 0.2453, "step": 6936 }, { "epoch": 23.920689655172414, "grad_norm": 0.9332893490791321, "learning_rate": 2.8688275862068967e-05, "loss": 0.2565, "step": 6937 }, { "epoch": 23.924137931034483, "grad_norm": 0.8753110766410828, "learning_rate": 2.869241379310345e-05, "loss": 0.2268, "step": 6938 }, { "epoch": 23.927586206896553, "grad_norm": 1.2952479124069214, "learning_rate": 2.869655172413793e-05, "loss": 0.2785, "step": 6939 }, { "epoch": 23.93103448275862, "grad_norm": 1.0207210779190063, "learning_rate": 2.8700689655172413e-05, "loss": 0.2601, "step": 6940 }, { "epoch": 23.93448275862069, "grad_norm": 0.8533781170845032, "learning_rate": 2.87048275862069e-05, "loss": 0.2631, "step": 6941 }, { "epoch": 23.93793103448276, "grad_norm": 1.6711424589157104, "learning_rate": 2.870896551724138e-05, "loss": 0.2551, "step": 6942 }, { "epoch": 23.94137931034483, "grad_norm": 2.0856566429138184, "learning_rate": 2.8713103448275863e-05, "loss": 0.2679, "step": 6943 }, { "epoch": 23.944827586206898, "grad_norm": 0.990643322467804, "learning_rate": 2.8717241379310345e-05, "loss": 0.304, "step": 6944 }, { "epoch": 23.948275862068964, "grad_norm": 1.321299433708191, "learning_rate": 2.872137931034483e-05, "loss": 0.3361, "step": 6945 }, { "epoch": 23.951724137931034, "grad_norm": 0.9210853576660156, "learning_rate": 2.8725517241379312e-05, "loss": 0.2937, "step": 6946 }, { "epoch": 23.955172413793104, "grad_norm": 0.6701274514198303, "learning_rate": 2.872965517241379e-05, "loss": 0.2685, "step": 6947 }, { "epoch": 23.958620689655174, "grad_norm": 2.229593276977539, "learning_rate": 2.8733793103448276e-05, "loss": 0.27, "step": 6948 }, { "epoch": 23.96206896551724, "grad_norm": 0.8646601438522339, "learning_rate": 2.8737931034482758e-05, "loss": 0.2461, "step": 6949 }, { "epoch": 23.96551724137931, "grad_norm": 1.625968337059021, "learning_rate": 2.8742068965517244e-05, "loss": 0.2794, "step": 6950 }, { "epoch": 23.96896551724138, "grad_norm": 1.6076620817184448, "learning_rate": 2.8746206896551722e-05, "loss": 0.2428, "step": 6951 }, { "epoch": 23.97241379310345, "grad_norm": 1.2254390716552734, "learning_rate": 2.8750344827586208e-05, "loss": 0.2434, "step": 6952 }, { "epoch": 23.97586206896552, "grad_norm": 0.8506723642349243, "learning_rate": 2.875448275862069e-05, "loss": 0.2575, "step": 6953 }, { "epoch": 23.979310344827585, "grad_norm": 1.7319377660751343, "learning_rate": 2.8758620689655175e-05, "loss": 0.2569, "step": 6954 }, { "epoch": 23.982758620689655, "grad_norm": 1.7370927333831787, "learning_rate": 2.8762758620689657e-05, "loss": 0.2943, "step": 6955 }, { "epoch": 23.986206896551725, "grad_norm": 4.504233360290527, "learning_rate": 2.876689655172414e-05, "loss": 0.2651, "step": 6956 }, { "epoch": 23.989655172413794, "grad_norm": 1.4749962091445923, "learning_rate": 2.877103448275862e-05, "loss": 0.2439, "step": 6957 }, { "epoch": 23.99310344827586, "grad_norm": 0.9933512210845947, "learning_rate": 2.8775172413793107e-05, "loss": 0.2639, "step": 6958 }, { "epoch": 23.99655172413793, "grad_norm": 0.9153459072113037, "learning_rate": 2.877931034482759e-05, "loss": 0.2445, "step": 6959 }, { "epoch": 24.0, "grad_norm": 1.4378445148468018, "learning_rate": 2.8783448275862067e-05, "loss": 0.341, "step": 6960 }, { "epoch": 24.00344827586207, "grad_norm": 0.772930383682251, "learning_rate": 2.8787586206896553e-05, "loss": 0.3063, "step": 6961 }, { "epoch": 24.00689655172414, "grad_norm": 0.549319863319397, "learning_rate": 2.8791724137931035e-05, "loss": 0.2655, "step": 6962 }, { "epoch": 24.010344827586206, "grad_norm": 1.0306190252304077, "learning_rate": 2.879586206896552e-05, "loss": 0.2767, "step": 6963 }, { "epoch": 24.013793103448275, "grad_norm": 0.6933546662330627, "learning_rate": 2.88e-05, "loss": 0.2875, "step": 6964 }, { "epoch": 24.017241379310345, "grad_norm": 0.9043781757354736, "learning_rate": 2.8804137931034484e-05, "loss": 0.2437, "step": 6965 }, { "epoch": 24.020689655172415, "grad_norm": 1.085426688194275, "learning_rate": 2.8808275862068966e-05, "loss": 0.2692, "step": 6966 }, { "epoch": 24.02413793103448, "grad_norm": 0.62013840675354, "learning_rate": 2.881241379310345e-05, "loss": 0.2306, "step": 6967 }, { "epoch": 24.02758620689655, "grad_norm": 1.000314474105835, "learning_rate": 2.881655172413793e-05, "loss": 0.2839, "step": 6968 }, { "epoch": 24.03103448275862, "grad_norm": 0.7582994699478149, "learning_rate": 2.8820689655172416e-05, "loss": 0.2338, "step": 6969 }, { "epoch": 24.03448275862069, "grad_norm": 1.0553979873657227, "learning_rate": 2.8824827586206898e-05, "loss": 0.2195, "step": 6970 }, { "epoch": 24.03793103448276, "grad_norm": 1.4907267093658447, "learning_rate": 2.8828965517241383e-05, "loss": 0.222, "step": 6971 }, { "epoch": 24.041379310344826, "grad_norm": 0.7282674312591553, "learning_rate": 2.883310344827586e-05, "loss": 0.2658, "step": 6972 }, { "epoch": 24.044827586206896, "grad_norm": 0.7774999141693115, "learning_rate": 2.8837241379310344e-05, "loss": 0.2401, "step": 6973 }, { "epoch": 24.048275862068966, "grad_norm": 0.971320390701294, "learning_rate": 2.884137931034483e-05, "loss": 0.2429, "step": 6974 }, { "epoch": 24.051724137931036, "grad_norm": 1.0045320987701416, "learning_rate": 2.884551724137931e-05, "loss": 0.2683, "step": 6975 }, { "epoch": 24.055172413793102, "grad_norm": 0.863885223865509, "learning_rate": 2.8849655172413793e-05, "loss": 0.2295, "step": 6976 }, { "epoch": 24.05862068965517, "grad_norm": 0.8582373857498169, "learning_rate": 2.8853793103448275e-05, "loss": 0.2665, "step": 6977 }, { "epoch": 24.06206896551724, "grad_norm": 1.304238200187683, "learning_rate": 2.885793103448276e-05, "loss": 0.2273, "step": 6978 }, { "epoch": 24.06551724137931, "grad_norm": 1.3993669748306274, "learning_rate": 2.8862068965517243e-05, "loss": 0.247, "step": 6979 }, { "epoch": 24.06896551724138, "grad_norm": 1.0023002624511719, "learning_rate": 2.8866206896551725e-05, "loss": 0.2355, "step": 6980 }, { "epoch": 24.072413793103447, "grad_norm": 1.3944891691207886, "learning_rate": 2.8870344827586207e-05, "loss": 0.2462, "step": 6981 }, { "epoch": 24.075862068965517, "grad_norm": 0.9159520268440247, "learning_rate": 2.8874482758620692e-05, "loss": 0.215, "step": 6982 }, { "epoch": 24.079310344827586, "grad_norm": 1.120379090309143, "learning_rate": 2.8878620689655174e-05, "loss": 0.2757, "step": 6983 }, { "epoch": 24.082758620689656, "grad_norm": 2.616450309753418, "learning_rate": 2.8882758620689656e-05, "loss": 0.319, "step": 6984 }, { "epoch": 24.086206896551722, "grad_norm": 2.16424298286438, "learning_rate": 2.8886896551724138e-05, "loss": 0.3238, "step": 6985 }, { "epoch": 24.089655172413792, "grad_norm": 1.2206875085830688, "learning_rate": 2.889103448275862e-05, "loss": 0.2811, "step": 6986 }, { "epoch": 24.093103448275862, "grad_norm": 0.9083728790283203, "learning_rate": 2.8895172413793105e-05, "loss": 0.2619, "step": 6987 }, { "epoch": 24.09655172413793, "grad_norm": 0.7412471771240234, "learning_rate": 2.8899310344827587e-05, "loss": 0.2665, "step": 6988 }, { "epoch": 24.1, "grad_norm": 0.6648778319358826, "learning_rate": 2.890344827586207e-05, "loss": 0.273, "step": 6989 }, { "epoch": 24.103448275862068, "grad_norm": 0.9222948551177979, "learning_rate": 2.890758620689655e-05, "loss": 0.2586, "step": 6990 }, { "epoch": 24.106896551724137, "grad_norm": 0.5752288103103638, "learning_rate": 2.8911724137931037e-05, "loss": 0.2316, "step": 6991 }, { "epoch": 24.110344827586207, "grad_norm": 0.673531174659729, "learning_rate": 2.891586206896552e-05, "loss": 0.2551, "step": 6992 }, { "epoch": 24.113793103448277, "grad_norm": 0.829002857208252, "learning_rate": 2.892e-05, "loss": 0.2877, "step": 6993 }, { "epoch": 24.117241379310343, "grad_norm": 0.9722888469696045, "learning_rate": 2.8924137931034483e-05, "loss": 0.2331, "step": 6994 }, { "epoch": 24.120689655172413, "grad_norm": 1.1573691368103027, "learning_rate": 2.892827586206897e-05, "loss": 0.2389, "step": 6995 }, { "epoch": 24.124137931034483, "grad_norm": 1.3253463506698608, "learning_rate": 2.893241379310345e-05, "loss": 0.2589, "step": 6996 }, { "epoch": 24.127586206896552, "grad_norm": 0.7688792943954468, "learning_rate": 2.8936551724137932e-05, "loss": 0.2187, "step": 6997 }, { "epoch": 24.131034482758622, "grad_norm": 1.09597647190094, "learning_rate": 2.8940689655172414e-05, "loss": 0.2266, "step": 6998 }, { "epoch": 24.13448275862069, "grad_norm": 0.644669771194458, "learning_rate": 2.8944827586206896e-05, "loss": 0.2375, "step": 6999 }, { "epoch": 24.137931034482758, "grad_norm": 1.3652786016464233, "learning_rate": 2.8948965517241382e-05, "loss": 0.2367, "step": 7000 }, { "epoch": 24.137931034482758, "eval_cer": 0.1344880022680997, "eval_loss": 0.3196215033531189, "eval_runtime": 20.141, "eval_samples_per_second": 46.026, "eval_steps_per_second": 0.149, "eval_wer": 0.3192287784679089, "step": 7000 }, { "epoch": 24.141379310344828, "grad_norm": 0.8725858330726624, "learning_rate": 2.895310344827586e-05, "loss": 0.2277, "step": 7001 }, { "epoch": 24.144827586206898, "grad_norm": 0.9545244574546814, "learning_rate": 2.8957241379310346e-05, "loss": 0.2275, "step": 7002 }, { "epoch": 24.148275862068967, "grad_norm": 1.501220464706421, "learning_rate": 2.8961379310344828e-05, "loss": 0.2401, "step": 7003 }, { "epoch": 24.151724137931033, "grad_norm": 0.805770754814148, "learning_rate": 2.8965517241379313e-05, "loss": 0.2361, "step": 7004 }, { "epoch": 24.155172413793103, "grad_norm": 0.8572880625724792, "learning_rate": 2.8969655172413792e-05, "loss": 0.2331, "step": 7005 }, { "epoch": 24.158620689655173, "grad_norm": 1.0751203298568726, "learning_rate": 2.8973793103448277e-05, "loss": 0.2695, "step": 7006 }, { "epoch": 24.162068965517243, "grad_norm": 1.079248070716858, "learning_rate": 2.897793103448276e-05, "loss": 0.2313, "step": 7007 }, { "epoch": 24.16551724137931, "grad_norm": 16.30780601501465, "learning_rate": 2.8982068965517245e-05, "loss": 0.2528, "step": 7008 }, { "epoch": 24.16896551724138, "grad_norm": 1.1631823778152466, "learning_rate": 2.8986206896551723e-05, "loss": 0.2782, "step": 7009 }, { "epoch": 24.17241379310345, "grad_norm": 2.092905282974243, "learning_rate": 2.8990344827586205e-05, "loss": 0.3533, "step": 7010 }, { "epoch": 24.175862068965518, "grad_norm": 0.6124144792556763, "learning_rate": 2.899448275862069e-05, "loss": 0.3017, "step": 7011 }, { "epoch": 24.179310344827588, "grad_norm": 1.3806830644607544, "learning_rate": 2.8998620689655173e-05, "loss": 0.2744, "step": 7012 }, { "epoch": 24.182758620689654, "grad_norm": 0.6620877981185913, "learning_rate": 2.9002758620689655e-05, "loss": 0.2667, "step": 7013 }, { "epoch": 24.186206896551724, "grad_norm": 0.9629511833190918, "learning_rate": 2.9006896551724137e-05, "loss": 0.2605, "step": 7014 }, { "epoch": 24.189655172413794, "grad_norm": 1.04558265209198, "learning_rate": 2.9011034482758622e-05, "loss": 0.2472, "step": 7015 }, { "epoch": 24.193103448275863, "grad_norm": 0.6545566916465759, "learning_rate": 2.9015172413793104e-05, "loss": 0.2588, "step": 7016 }, { "epoch": 24.19655172413793, "grad_norm": 1.0592929124832153, "learning_rate": 2.9019310344827586e-05, "loss": 0.2551, "step": 7017 }, { "epoch": 24.2, "grad_norm": 1.185994029045105, "learning_rate": 2.902344827586207e-05, "loss": 0.2366, "step": 7018 }, { "epoch": 24.20344827586207, "grad_norm": 1.0719032287597656, "learning_rate": 2.9027586206896554e-05, "loss": 0.2413, "step": 7019 }, { "epoch": 24.20689655172414, "grad_norm": 1.1178172826766968, "learning_rate": 2.9031724137931036e-05, "loss": 0.2489, "step": 7020 }, { "epoch": 24.21034482758621, "grad_norm": 1.047223687171936, "learning_rate": 2.9035862068965518e-05, "loss": 0.2243, "step": 7021 }, { "epoch": 24.213793103448275, "grad_norm": 1.02647864818573, "learning_rate": 2.904e-05, "loss": 0.2595, "step": 7022 }, { "epoch": 24.217241379310344, "grad_norm": 1.125938892364502, "learning_rate": 2.9044137931034482e-05, "loss": 0.2225, "step": 7023 }, { "epoch": 24.220689655172414, "grad_norm": 1.462570309638977, "learning_rate": 2.9048275862068967e-05, "loss": 0.2283, "step": 7024 }, { "epoch": 24.224137931034484, "grad_norm": 0.9445935487747192, "learning_rate": 2.905241379310345e-05, "loss": 0.2328, "step": 7025 }, { "epoch": 24.22758620689655, "grad_norm": 1.2581311464309692, "learning_rate": 2.905655172413793e-05, "loss": 0.2219, "step": 7026 }, { "epoch": 24.23103448275862, "grad_norm": 1.3526926040649414, "learning_rate": 2.9060689655172413e-05, "loss": 0.2299, "step": 7027 }, { "epoch": 24.23448275862069, "grad_norm": 0.7447390556335449, "learning_rate": 2.90648275862069e-05, "loss": 0.2688, "step": 7028 }, { "epoch": 24.23793103448276, "grad_norm": 0.9575294256210327, "learning_rate": 2.906896551724138e-05, "loss": 0.2181, "step": 7029 }, { "epoch": 24.24137931034483, "grad_norm": 1.1703892946243286, "learning_rate": 2.9073103448275863e-05, "loss": 0.2494, "step": 7030 }, { "epoch": 24.244827586206895, "grad_norm": 1.1091777086257935, "learning_rate": 2.9077241379310345e-05, "loss": 0.2513, "step": 7031 }, { "epoch": 24.248275862068965, "grad_norm": 1.5696511268615723, "learning_rate": 2.908137931034483e-05, "loss": 0.2456, "step": 7032 }, { "epoch": 24.251724137931035, "grad_norm": 0.8910120725631714, "learning_rate": 2.9085517241379312e-05, "loss": 0.2779, "step": 7033 }, { "epoch": 24.255172413793105, "grad_norm": 1.8726847171783447, "learning_rate": 2.9089655172413794e-05, "loss": 0.2606, "step": 7034 }, { "epoch": 24.25862068965517, "grad_norm": 5.98206901550293, "learning_rate": 2.9093793103448276e-05, "loss": 0.3415, "step": 7035 }, { "epoch": 24.26206896551724, "grad_norm": 0.6657417416572571, "learning_rate": 2.9097931034482758e-05, "loss": 0.3093, "step": 7036 }, { "epoch": 24.26551724137931, "grad_norm": 1.9124789237976074, "learning_rate": 2.9102068965517244e-05, "loss": 0.2474, "step": 7037 }, { "epoch": 24.26896551724138, "grad_norm": 0.6151341795921326, "learning_rate": 2.9106206896551722e-05, "loss": 0.2899, "step": 7038 }, { "epoch": 24.27241379310345, "grad_norm": 1.0074723958969116, "learning_rate": 2.9110344827586208e-05, "loss": 0.243, "step": 7039 }, { "epoch": 24.275862068965516, "grad_norm": 1.6601521968841553, "learning_rate": 2.911448275862069e-05, "loss": 0.2651, "step": 7040 }, { "epoch": 24.279310344827586, "grad_norm": 1.4716756343841553, "learning_rate": 2.9118620689655175e-05, "loss": 0.2677, "step": 7041 }, { "epoch": 24.282758620689656, "grad_norm": 0.5567337870597839, "learning_rate": 2.9122758620689654e-05, "loss": 0.2568, "step": 7042 }, { "epoch": 24.286206896551725, "grad_norm": 2.6645760536193848, "learning_rate": 2.912689655172414e-05, "loss": 0.242, "step": 7043 }, { "epoch": 24.28965517241379, "grad_norm": 1.690125823020935, "learning_rate": 2.913103448275862e-05, "loss": 0.2583, "step": 7044 }, { "epoch": 24.29310344827586, "grad_norm": 0.7650014162063599, "learning_rate": 2.9135172413793107e-05, "loss": 0.2578, "step": 7045 }, { "epoch": 24.29655172413793, "grad_norm": 2.4685070514678955, "learning_rate": 2.9139310344827585e-05, "loss": 0.2477, "step": 7046 }, { "epoch": 24.3, "grad_norm": 0.8190164566040039, "learning_rate": 2.914344827586207e-05, "loss": 0.2691, "step": 7047 }, { "epoch": 24.30344827586207, "grad_norm": 1.981947422027588, "learning_rate": 2.9147586206896553e-05, "loss": 0.2935, "step": 7048 }, { "epoch": 24.306896551724137, "grad_norm": 1.0437500476837158, "learning_rate": 2.9151724137931035e-05, "loss": 0.2255, "step": 7049 }, { "epoch": 24.310344827586206, "grad_norm": 1.0952235460281372, "learning_rate": 2.9155862068965517e-05, "loss": 0.255, "step": 7050 }, { "epoch": 24.313793103448276, "grad_norm": 0.7010920643806458, "learning_rate": 2.916e-05, "loss": 0.2668, "step": 7051 }, { "epoch": 24.317241379310346, "grad_norm": 0.8884496092796326, "learning_rate": 2.9164137931034484e-05, "loss": 0.255, "step": 7052 }, { "epoch": 24.320689655172412, "grad_norm": 1.7324235439300537, "learning_rate": 2.9168275862068966e-05, "loss": 0.2473, "step": 7053 }, { "epoch": 24.324137931034482, "grad_norm": 0.7487353682518005, "learning_rate": 2.9172413793103448e-05, "loss": 0.2601, "step": 7054 }, { "epoch": 24.32758620689655, "grad_norm": 1.2114412784576416, "learning_rate": 2.917655172413793e-05, "loss": 0.2329, "step": 7055 }, { "epoch": 24.33103448275862, "grad_norm": 2.0380005836486816, "learning_rate": 2.9180689655172416e-05, "loss": 0.2268, "step": 7056 }, { "epoch": 24.33448275862069, "grad_norm": 1.2394746541976929, "learning_rate": 2.9184827586206898e-05, "loss": 0.2375, "step": 7057 }, { "epoch": 24.337931034482757, "grad_norm": 1.0279475450515747, "learning_rate": 2.9188965517241383e-05, "loss": 0.2749, "step": 7058 }, { "epoch": 24.341379310344827, "grad_norm": 8.889538764953613, "learning_rate": 2.919310344827586e-05, "loss": 0.2613, "step": 7059 }, { "epoch": 24.344827586206897, "grad_norm": 1.4687786102294922, "learning_rate": 2.9197241379310347e-05, "loss": 0.3072, "step": 7060 }, { "epoch": 24.348275862068967, "grad_norm": 1.7238589525222778, "learning_rate": 2.920137931034483e-05, "loss": 0.3873, "step": 7061 }, { "epoch": 24.351724137931033, "grad_norm": 0.7160221934318542, "learning_rate": 2.920551724137931e-05, "loss": 0.2818, "step": 7062 }, { "epoch": 24.355172413793102, "grad_norm": 0.759817361831665, "learning_rate": 2.9209655172413793e-05, "loss": 0.2912, "step": 7063 }, { "epoch": 24.358620689655172, "grad_norm": 1.0850008726119995, "learning_rate": 2.9213793103448275e-05, "loss": 0.259, "step": 7064 }, { "epoch": 24.362068965517242, "grad_norm": 1.0540640354156494, "learning_rate": 2.921793103448276e-05, "loss": 0.3006, "step": 7065 }, { "epoch": 24.36551724137931, "grad_norm": 0.6499329805374146, "learning_rate": 2.9222068965517242e-05, "loss": 0.2641, "step": 7066 }, { "epoch": 24.368965517241378, "grad_norm": 0.5874789953231812, "learning_rate": 2.9226206896551724e-05, "loss": 0.2525, "step": 7067 }, { "epoch": 24.372413793103448, "grad_norm": 2.300764322280884, "learning_rate": 2.9230344827586206e-05, "loss": 0.2769, "step": 7068 }, { "epoch": 24.375862068965517, "grad_norm": 0.6822474002838135, "learning_rate": 2.9234482758620692e-05, "loss": 0.2393, "step": 7069 }, { "epoch": 24.379310344827587, "grad_norm": 1.7257362604141235, "learning_rate": 2.9238620689655174e-05, "loss": 0.2749, "step": 7070 }, { "epoch": 24.382758620689657, "grad_norm": 0.9216638803482056, "learning_rate": 2.9242758620689656e-05, "loss": 0.2399, "step": 7071 }, { "epoch": 24.386206896551723, "grad_norm": 1.2694916725158691, "learning_rate": 2.9246896551724138e-05, "loss": 0.2403, "step": 7072 }, { "epoch": 24.389655172413793, "grad_norm": 1.117575764656067, "learning_rate": 2.9251034482758623e-05, "loss": 0.2705, "step": 7073 }, { "epoch": 24.393103448275863, "grad_norm": 0.8554812073707581, "learning_rate": 2.9255172413793105e-05, "loss": 0.2668, "step": 7074 }, { "epoch": 24.396551724137932, "grad_norm": 1.5074000358581543, "learning_rate": 2.9259310344827584e-05, "loss": 0.2802, "step": 7075 }, { "epoch": 24.4, "grad_norm": 0.6791738271713257, "learning_rate": 2.926344827586207e-05, "loss": 0.2454, "step": 7076 }, { "epoch": 24.40344827586207, "grad_norm": 0.7933318614959717, "learning_rate": 2.926758620689655e-05, "loss": 0.2478, "step": 7077 }, { "epoch": 24.406896551724138, "grad_norm": 0.8039292693138123, "learning_rate": 2.9271724137931037e-05, "loss": 0.2338, "step": 7078 }, { "epoch": 24.410344827586208, "grad_norm": 1.005847692489624, "learning_rate": 2.9275862068965515e-05, "loss": 0.2739, "step": 7079 }, { "epoch": 24.413793103448278, "grad_norm": 0.9055933952331543, "learning_rate": 2.928e-05, "loss": 0.217, "step": 7080 }, { "epoch": 24.417241379310344, "grad_norm": 1.098015546798706, "learning_rate": 2.9284137931034483e-05, "loss": 0.2264, "step": 7081 }, { "epoch": 24.420689655172414, "grad_norm": 1.1301888227462769, "learning_rate": 2.9288275862068968e-05, "loss": 0.224, "step": 7082 }, { "epoch": 24.424137931034483, "grad_norm": 1.496385931968689, "learning_rate": 2.9292413793103447e-05, "loss": 0.2271, "step": 7083 }, { "epoch": 24.427586206896553, "grad_norm": 0.9335336685180664, "learning_rate": 2.9296551724137932e-05, "loss": 0.2471, "step": 7084 }, { "epoch": 24.43103448275862, "grad_norm": 1.9785269498825073, "learning_rate": 2.9300689655172414e-05, "loss": 0.3672, "step": 7085 }, { "epoch": 24.43448275862069, "grad_norm": 0.8953890204429626, "learning_rate": 2.93048275862069e-05, "loss": 0.3345, "step": 7086 }, { "epoch": 24.43793103448276, "grad_norm": 0.667905330657959, "learning_rate": 2.930896551724138e-05, "loss": 0.2834, "step": 7087 }, { "epoch": 24.44137931034483, "grad_norm": 1.0195116996765137, "learning_rate": 2.931310344827586e-05, "loss": 0.2716, "step": 7088 }, { "epoch": 24.444827586206898, "grad_norm": 0.7586660385131836, "learning_rate": 2.9317241379310346e-05, "loss": 0.2901, "step": 7089 }, { "epoch": 24.448275862068964, "grad_norm": 1.0115728378295898, "learning_rate": 2.9321379310344828e-05, "loss": 0.2389, "step": 7090 }, { "epoch": 24.451724137931034, "grad_norm": 1.262388825416565, "learning_rate": 2.9325517241379313e-05, "loss": 0.275, "step": 7091 }, { "epoch": 24.455172413793104, "grad_norm": 1.8585485219955444, "learning_rate": 2.9329655172413792e-05, "loss": 0.2858, "step": 7092 }, { "epoch": 24.458620689655174, "grad_norm": 0.6217002272605896, "learning_rate": 2.9333793103448277e-05, "loss": 0.252, "step": 7093 }, { "epoch": 24.46206896551724, "grad_norm": 1.2084225416183472, "learning_rate": 2.933793103448276e-05, "loss": 0.2506, "step": 7094 }, { "epoch": 24.46551724137931, "grad_norm": 0.8426377773284912, "learning_rate": 2.9342068965517245e-05, "loss": 0.2456, "step": 7095 }, { "epoch": 24.46896551724138, "grad_norm": 0.7927412390708923, "learning_rate": 2.9346206896551723e-05, "loss": 0.2391, "step": 7096 }, { "epoch": 24.47241379310345, "grad_norm": 1.1369192600250244, "learning_rate": 2.935034482758621e-05, "loss": 0.2542, "step": 7097 }, { "epoch": 24.47586206896552, "grad_norm": 1.172947883605957, "learning_rate": 2.935448275862069e-05, "loss": 0.2411, "step": 7098 }, { "epoch": 24.479310344827585, "grad_norm": 0.8407701253890991, "learning_rate": 2.9358620689655176e-05, "loss": 0.2565, "step": 7099 }, { "epoch": 24.482758620689655, "grad_norm": 0.6822278499603271, "learning_rate": 2.9362758620689655e-05, "loss": 0.2217, "step": 7100 }, { "epoch": 24.486206896551725, "grad_norm": 1.0120173692703247, "learning_rate": 2.9366896551724137e-05, "loss": 0.2509, "step": 7101 }, { "epoch": 24.489655172413794, "grad_norm": 1.073155164718628, "learning_rate": 2.9371034482758622e-05, "loss": 0.223, "step": 7102 }, { "epoch": 24.49310344827586, "grad_norm": 0.7527897953987122, "learning_rate": 2.9375172413793104e-05, "loss": 0.2165, "step": 7103 }, { "epoch": 24.49655172413793, "grad_norm": 0.8036370873451233, "learning_rate": 2.9379310344827586e-05, "loss": 0.2456, "step": 7104 }, { "epoch": 24.5, "grad_norm": 1.008475661277771, "learning_rate": 2.9383448275862068e-05, "loss": 0.2637, "step": 7105 }, { "epoch": 24.50344827586207, "grad_norm": 1.8688976764678955, "learning_rate": 2.9387586206896554e-05, "loss": 0.2805, "step": 7106 }, { "epoch": 24.50689655172414, "grad_norm": 2.749185800552368, "learning_rate": 2.9391724137931036e-05, "loss": 0.2575, "step": 7107 }, { "epoch": 24.510344827586206, "grad_norm": 1.8054733276367188, "learning_rate": 2.9395862068965518e-05, "loss": 0.2569, "step": 7108 }, { "epoch": 24.513793103448275, "grad_norm": 1.9599131345748901, "learning_rate": 2.94e-05, "loss": 0.2929, "step": 7109 }, { "epoch": 24.517241379310345, "grad_norm": 1.1766926050186157, "learning_rate": 2.9404137931034485e-05, "loss": 0.3199, "step": 7110 }, { "epoch": 24.520689655172415, "grad_norm": 0.8858413100242615, "learning_rate": 2.9408275862068967e-05, "loss": 0.3122, "step": 7111 }, { "epoch": 24.52413793103448, "grad_norm": 0.6440045237541199, "learning_rate": 2.941241379310345e-05, "loss": 0.2787, "step": 7112 }, { "epoch": 24.52758620689655, "grad_norm": 0.5840417146682739, "learning_rate": 2.941655172413793e-05, "loss": 0.2839, "step": 7113 }, { "epoch": 24.53103448275862, "grad_norm": 1.125719428062439, "learning_rate": 2.9420689655172413e-05, "loss": 0.2707, "step": 7114 }, { "epoch": 24.53448275862069, "grad_norm": 0.9204256534576416, "learning_rate": 2.94248275862069e-05, "loss": 0.2476, "step": 7115 }, { "epoch": 24.53793103448276, "grad_norm": 0.9983019828796387, "learning_rate": 2.9428965517241377e-05, "loss": 0.2742, "step": 7116 }, { "epoch": 24.541379310344826, "grad_norm": 1.9350552558898926, "learning_rate": 2.9433103448275863e-05, "loss": 0.2564, "step": 7117 }, { "epoch": 24.544827586206896, "grad_norm": 1.1977643966674805, "learning_rate": 2.9437241379310345e-05, "loss": 0.2667, "step": 7118 }, { "epoch": 24.548275862068966, "grad_norm": 1.1947100162506104, "learning_rate": 2.944137931034483e-05, "loss": 0.2439, "step": 7119 }, { "epoch": 24.551724137931036, "grad_norm": 0.7378824949264526, "learning_rate": 2.944551724137931e-05, "loss": 0.2442, "step": 7120 }, { "epoch": 24.555172413793102, "grad_norm": 3.076932668685913, "learning_rate": 2.9449655172413794e-05, "loss": 0.2574, "step": 7121 }, { "epoch": 24.55862068965517, "grad_norm": 0.9910748600959778, "learning_rate": 2.9453793103448276e-05, "loss": 0.252, "step": 7122 }, { "epoch": 24.56206896551724, "grad_norm": 3.9603612422943115, "learning_rate": 2.945793103448276e-05, "loss": 0.2445, "step": 7123 }, { "epoch": 24.56551724137931, "grad_norm": 2.300949811935425, "learning_rate": 2.9462068965517244e-05, "loss": 0.2465, "step": 7124 }, { "epoch": 24.56896551724138, "grad_norm": 1.6633145809173584, "learning_rate": 2.9466206896551726e-05, "loss": 0.2479, "step": 7125 }, { "epoch": 24.572413793103447, "grad_norm": 1.4136322736740112, "learning_rate": 2.9470344827586208e-05, "loss": 0.2369, "step": 7126 }, { "epoch": 24.575862068965517, "grad_norm": 1.299492359161377, "learning_rate": 2.947448275862069e-05, "loss": 0.2508, "step": 7127 }, { "epoch": 24.579310344827586, "grad_norm": 2.391695022583008, "learning_rate": 2.9478620689655175e-05, "loss": 0.2325, "step": 7128 }, { "epoch": 24.582758620689656, "grad_norm": 0.9576685428619385, "learning_rate": 2.9482758620689654e-05, "loss": 0.2535, "step": 7129 }, { "epoch": 24.586206896551722, "grad_norm": 1.2650312185287476, "learning_rate": 2.948689655172414e-05, "loss": 0.2759, "step": 7130 }, { "epoch": 24.589655172413792, "grad_norm": 1.1725447177886963, "learning_rate": 2.949103448275862e-05, "loss": 0.2282, "step": 7131 }, { "epoch": 24.593103448275862, "grad_norm": 1.1858205795288086, "learning_rate": 2.9495172413793106e-05, "loss": 0.2586, "step": 7132 }, { "epoch": 24.59655172413793, "grad_norm": 1.7041094303131104, "learning_rate": 2.9499310344827585e-05, "loss": 0.2164, "step": 7133 }, { "epoch": 24.6, "grad_norm": 1.0097652673721313, "learning_rate": 2.950344827586207e-05, "loss": 0.2707, "step": 7134 }, { "epoch": 24.603448275862068, "grad_norm": 2.595123052597046, "learning_rate": 2.9507586206896553e-05, "loss": 0.3552, "step": 7135 }, { "epoch": 24.606896551724137, "grad_norm": 0.8880007266998291, "learning_rate": 2.9511724137931038e-05, "loss": 0.3197, "step": 7136 }, { "epoch": 24.610344827586207, "grad_norm": 1.3001962900161743, "learning_rate": 2.9515862068965517e-05, "loss": 0.3277, "step": 7137 }, { "epoch": 24.613793103448277, "grad_norm": 0.900956392288208, "learning_rate": 2.9520000000000002e-05, "loss": 0.2937, "step": 7138 }, { "epoch": 24.617241379310343, "grad_norm": 1.486893892288208, "learning_rate": 2.9524137931034484e-05, "loss": 0.265, "step": 7139 }, { "epoch": 24.620689655172413, "grad_norm": 1.1019014120101929, "learning_rate": 2.9528275862068966e-05, "loss": 0.2618, "step": 7140 }, { "epoch": 24.624137931034483, "grad_norm": 0.8022728562355042, "learning_rate": 2.9532413793103448e-05, "loss": 0.2567, "step": 7141 }, { "epoch": 24.627586206896552, "grad_norm": 0.7718653082847595, "learning_rate": 2.953655172413793e-05, "loss": 0.3272, "step": 7142 }, { "epoch": 24.631034482758622, "grad_norm": 0.7641851305961609, "learning_rate": 2.9540689655172415e-05, "loss": 0.2878, "step": 7143 }, { "epoch": 24.63448275862069, "grad_norm": 1.3366975784301758, "learning_rate": 2.9544827586206897e-05, "loss": 0.2601, "step": 7144 }, { "epoch": 24.637931034482758, "grad_norm": 0.7362766265869141, "learning_rate": 2.954896551724138e-05, "loss": 0.2784, "step": 7145 }, { "epoch": 24.641379310344828, "grad_norm": 0.9449194073677063, "learning_rate": 2.955310344827586e-05, "loss": 0.2539, "step": 7146 }, { "epoch": 24.644827586206898, "grad_norm": 0.9306392073631287, "learning_rate": 2.9557241379310347e-05, "loss": 0.2812, "step": 7147 }, { "epoch": 24.648275862068964, "grad_norm": 1.3258377313613892, "learning_rate": 2.956137931034483e-05, "loss": 0.2388, "step": 7148 }, { "epoch": 24.651724137931033, "grad_norm": 0.9750763773918152, "learning_rate": 2.956551724137931e-05, "loss": 0.2759, "step": 7149 }, { "epoch": 24.655172413793103, "grad_norm": 2.279737949371338, "learning_rate": 2.9569655172413793e-05, "loss": 0.2877, "step": 7150 }, { "epoch": 24.658620689655173, "grad_norm": 0.8019377589225769, "learning_rate": 2.957379310344828e-05, "loss": 0.2331, "step": 7151 }, { "epoch": 24.662068965517243, "grad_norm": 0.9472291469573975, "learning_rate": 2.957793103448276e-05, "loss": 0.2939, "step": 7152 }, { "epoch": 24.66551724137931, "grad_norm": 1.2988909482955933, "learning_rate": 2.958206896551724e-05, "loss": 0.2364, "step": 7153 }, { "epoch": 24.66896551724138, "grad_norm": 1.1321946382522583, "learning_rate": 2.9586206896551724e-05, "loss": 0.2417, "step": 7154 }, { "epoch": 24.67241379310345, "grad_norm": 1.2686537504196167, "learning_rate": 2.9590344827586206e-05, "loss": 0.2497, "step": 7155 }, { "epoch": 24.675862068965518, "grad_norm": 0.9020682573318481, "learning_rate": 2.9594482758620692e-05, "loss": 0.2661, "step": 7156 }, { "epoch": 24.679310344827588, "grad_norm": 1.279886245727539, "learning_rate": 2.9598620689655174e-05, "loss": 0.2865, "step": 7157 }, { "epoch": 24.682758620689654, "grad_norm": 1.6706956624984741, "learning_rate": 2.9602758620689656e-05, "loss": 0.2435, "step": 7158 }, { "epoch": 24.686206896551724, "grad_norm": 1.3187906742095947, "learning_rate": 2.9606896551724138e-05, "loss": 0.2765, "step": 7159 }, { "epoch": 24.689655172413794, "grad_norm": 2.688133478164673, "learning_rate": 2.9611034482758623e-05, "loss": 0.3772, "step": 7160 }, { "epoch": 24.693103448275863, "grad_norm": 0.8119664788246155, "learning_rate": 2.9615172413793105e-05, "loss": 0.2902, "step": 7161 }, { "epoch": 24.69655172413793, "grad_norm": 0.7466662526130676, "learning_rate": 2.9619310344827587e-05, "loss": 0.2869, "step": 7162 }, { "epoch": 24.7, "grad_norm": 0.8148683905601501, "learning_rate": 2.962344827586207e-05, "loss": 0.2635, "step": 7163 }, { "epoch": 24.70344827586207, "grad_norm": 0.6682412028312683, "learning_rate": 2.962758620689655e-05, "loss": 0.2695, "step": 7164 }, { "epoch": 24.70689655172414, "grad_norm": 1.0977991819381714, "learning_rate": 2.9631724137931037e-05, "loss": 0.2765, "step": 7165 }, { "epoch": 24.71034482758621, "grad_norm": 0.6958579421043396, "learning_rate": 2.9635862068965515e-05, "loss": 0.2487, "step": 7166 }, { "epoch": 24.713793103448275, "grad_norm": 0.8004865646362305, "learning_rate": 2.964e-05, "loss": 0.2664, "step": 7167 }, { "epoch": 24.717241379310344, "grad_norm": 1.040416955947876, "learning_rate": 2.9644137931034483e-05, "loss": 0.2388, "step": 7168 }, { "epoch": 24.720689655172414, "grad_norm": 0.9621669054031372, "learning_rate": 2.9648275862068968e-05, "loss": 0.2646, "step": 7169 }, { "epoch": 24.724137931034484, "grad_norm": 1.029356598854065, "learning_rate": 2.9652413793103447e-05, "loss": 0.2332, "step": 7170 }, { "epoch": 24.72758620689655, "grad_norm": 1.3321059942245483, "learning_rate": 2.9656551724137932e-05, "loss": 0.2396, "step": 7171 }, { "epoch": 24.73103448275862, "grad_norm": 0.9660395383834839, "learning_rate": 2.9660689655172414e-05, "loss": 0.2361, "step": 7172 }, { "epoch": 24.73448275862069, "grad_norm": 0.894507110118866, "learning_rate": 2.96648275862069e-05, "loss": 0.2265, "step": 7173 }, { "epoch": 24.73793103448276, "grad_norm": 1.328138828277588, "learning_rate": 2.966896551724138e-05, "loss": 0.2506, "step": 7174 }, { "epoch": 24.74137931034483, "grad_norm": 1.1490917205810547, "learning_rate": 2.9673103448275864e-05, "loss": 0.2482, "step": 7175 }, { "epoch": 24.744827586206895, "grad_norm": 1.057145118713379, "learning_rate": 2.9677241379310346e-05, "loss": 0.2357, "step": 7176 }, { "epoch": 24.748275862068965, "grad_norm": 0.9443281888961792, "learning_rate": 2.9681379310344828e-05, "loss": 0.2296, "step": 7177 }, { "epoch": 24.751724137931035, "grad_norm": 0.9018959403038025, "learning_rate": 2.968551724137931e-05, "loss": 0.2231, "step": 7178 }, { "epoch": 24.755172413793105, "grad_norm": 0.7795891165733337, "learning_rate": 2.9689655172413792e-05, "loss": 0.2285, "step": 7179 }, { "epoch": 24.75862068965517, "grad_norm": 1.1029695272445679, "learning_rate": 2.9693793103448277e-05, "loss": 0.2476, "step": 7180 }, { "epoch": 24.76206896551724, "grad_norm": 0.9544664621353149, "learning_rate": 2.969793103448276e-05, "loss": 0.2447, "step": 7181 }, { "epoch": 24.76551724137931, "grad_norm": 0.858982264995575, "learning_rate": 2.970206896551724e-05, "loss": 0.2252, "step": 7182 }, { "epoch": 24.76896551724138, "grad_norm": 1.406503438949585, "learning_rate": 2.9706206896551723e-05, "loss": 0.2556, "step": 7183 }, { "epoch": 24.77241379310345, "grad_norm": 1.3761197328567505, "learning_rate": 2.971034482758621e-05, "loss": 0.2572, "step": 7184 }, { "epoch": 24.775862068965516, "grad_norm": 1.3320354223251343, "learning_rate": 2.971448275862069e-05, "loss": 0.341, "step": 7185 }, { "epoch": 24.779310344827586, "grad_norm": 0.9473810195922852, "learning_rate": 2.9718620689655173e-05, "loss": 0.315, "step": 7186 }, { "epoch": 24.782758620689656, "grad_norm": 0.9228931665420532, "learning_rate": 2.9722758620689655e-05, "loss": 0.2669, "step": 7187 }, { "epoch": 24.786206896551725, "grad_norm": 0.8351612091064453, "learning_rate": 2.972689655172414e-05, "loss": 0.2683, "step": 7188 }, { "epoch": 24.78965517241379, "grad_norm": 0.5733228921890259, "learning_rate": 2.9731034482758622e-05, "loss": 0.2946, "step": 7189 }, { "epoch": 24.79310344827586, "grad_norm": 0.7848276495933533, "learning_rate": 2.9735172413793104e-05, "loss": 0.2538, "step": 7190 }, { "epoch": 24.79655172413793, "grad_norm": 0.7687768340110779, "learning_rate": 2.9739310344827586e-05, "loss": 0.271, "step": 7191 }, { "epoch": 24.8, "grad_norm": 0.5447540283203125, "learning_rate": 2.9743448275862068e-05, "loss": 0.2407, "step": 7192 }, { "epoch": 24.80344827586207, "grad_norm": 1.164650321006775, "learning_rate": 2.9747586206896554e-05, "loss": 0.269, "step": 7193 }, { "epoch": 24.806896551724137, "grad_norm": 0.64692622423172, "learning_rate": 2.9751724137931036e-05, "loss": 0.2401, "step": 7194 }, { "epoch": 24.810344827586206, "grad_norm": 1.39447820186615, "learning_rate": 2.9755862068965518e-05, "loss": 0.2479, "step": 7195 }, { "epoch": 24.813793103448276, "grad_norm": 0.6484533548355103, "learning_rate": 2.976e-05, "loss": 0.2687, "step": 7196 }, { "epoch": 24.817241379310346, "grad_norm": 1.053998351097107, "learning_rate": 2.9764137931034485e-05, "loss": 0.2876, "step": 7197 }, { "epoch": 24.820689655172412, "grad_norm": 0.8210556507110596, "learning_rate": 2.9768275862068967e-05, "loss": 0.2416, "step": 7198 }, { "epoch": 24.824137931034482, "grad_norm": 0.5512459874153137, "learning_rate": 2.977241379310345e-05, "loss": 0.2287, "step": 7199 }, { "epoch": 24.82758620689655, "grad_norm": 0.8047906160354614, "learning_rate": 2.977655172413793e-05, "loss": 0.2712, "step": 7200 }, { "epoch": 24.83103448275862, "grad_norm": 0.6133447289466858, "learning_rate": 2.9780689655172417e-05, "loss": 0.2206, "step": 7201 }, { "epoch": 24.83448275862069, "grad_norm": 0.7206403017044067, "learning_rate": 2.97848275862069e-05, "loss": 0.2683, "step": 7202 }, { "epoch": 24.837931034482757, "grad_norm": 0.8519675135612488, "learning_rate": 2.9788965517241377e-05, "loss": 0.2761, "step": 7203 }, { "epoch": 24.841379310344827, "grad_norm": 1.0649924278259277, "learning_rate": 2.9793103448275863e-05, "loss": 0.2481, "step": 7204 }, { "epoch": 24.844827586206897, "grad_norm": 0.7787613868713379, "learning_rate": 2.9797241379310345e-05, "loss": 0.2274, "step": 7205 }, { "epoch": 24.848275862068967, "grad_norm": 1.7419826984405518, "learning_rate": 2.980137931034483e-05, "loss": 0.2373, "step": 7206 }, { "epoch": 24.851724137931036, "grad_norm": 2.415173053741455, "learning_rate": 2.980551724137931e-05, "loss": 0.2378, "step": 7207 }, { "epoch": 24.855172413793102, "grad_norm": 1.104344367980957, "learning_rate": 2.9809655172413794e-05, "loss": 0.2309, "step": 7208 }, { "epoch": 24.858620689655172, "grad_norm": 1.0111637115478516, "learning_rate": 2.9813793103448276e-05, "loss": 0.3005, "step": 7209 }, { "epoch": 24.862068965517242, "grad_norm": 1.699483036994934, "learning_rate": 2.981793103448276e-05, "loss": 0.3413, "step": 7210 }, { "epoch": 24.86551724137931, "grad_norm": 0.6629437208175659, "learning_rate": 2.982206896551724e-05, "loss": 0.3049, "step": 7211 }, { "epoch": 24.868965517241378, "grad_norm": 0.6488236784934998, "learning_rate": 2.9826206896551726e-05, "loss": 0.2815, "step": 7212 }, { "epoch": 24.872413793103448, "grad_norm": 1.2967160940170288, "learning_rate": 2.9830344827586208e-05, "loss": 0.2636, "step": 7213 }, { "epoch": 24.875862068965517, "grad_norm": 0.7017049789428711, "learning_rate": 2.9834482758620693e-05, "loss": 0.2578, "step": 7214 }, { "epoch": 24.879310344827587, "grad_norm": 1.204703688621521, "learning_rate": 2.983862068965517e-05, "loss": 0.252, "step": 7215 }, { "epoch": 24.882758620689657, "grad_norm": 0.9129435420036316, "learning_rate": 2.9842758620689654e-05, "loss": 0.228, "step": 7216 }, { "epoch": 24.886206896551723, "grad_norm": 0.8647228479385376, "learning_rate": 2.984689655172414e-05, "loss": 0.2943, "step": 7217 }, { "epoch": 24.889655172413793, "grad_norm": 0.5765058994293213, "learning_rate": 2.985103448275862e-05, "loss": 0.2659, "step": 7218 }, { "epoch": 24.893103448275863, "grad_norm": 0.9614980816841125, "learning_rate": 2.9855172413793103e-05, "loss": 0.2587, "step": 7219 }, { "epoch": 24.896551724137932, "grad_norm": 0.6066480875015259, "learning_rate": 2.9859310344827585e-05, "loss": 0.2583, "step": 7220 }, { "epoch": 24.9, "grad_norm": 0.7053432464599609, "learning_rate": 2.986344827586207e-05, "loss": 0.2704, "step": 7221 }, { "epoch": 24.90344827586207, "grad_norm": 1.0049151182174683, "learning_rate": 2.9867586206896552e-05, "loss": 0.2619, "step": 7222 }, { "epoch": 24.906896551724138, "grad_norm": 0.8401839137077332, "learning_rate": 2.9871724137931038e-05, "loss": 0.2617, "step": 7223 }, { "epoch": 24.910344827586208, "grad_norm": 0.6035805940628052, "learning_rate": 2.9875862068965517e-05, "loss": 0.2583, "step": 7224 }, { "epoch": 24.913793103448278, "grad_norm": 0.7523833513259888, "learning_rate": 2.9880000000000002e-05, "loss": 0.2475, "step": 7225 }, { "epoch": 24.917241379310344, "grad_norm": 1.2734519243240356, "learning_rate": 2.9884137931034484e-05, "loss": 0.2438, "step": 7226 }, { "epoch": 24.920689655172414, "grad_norm": 1.1530622243881226, "learning_rate": 2.988827586206897e-05, "loss": 0.2447, "step": 7227 }, { "epoch": 24.924137931034483, "grad_norm": 0.7108520865440369, "learning_rate": 2.9892413793103448e-05, "loss": 0.233, "step": 7228 }, { "epoch": 24.927586206896553, "grad_norm": 1.266501784324646, "learning_rate": 2.989655172413793e-05, "loss": 0.2537, "step": 7229 }, { "epoch": 24.93103448275862, "grad_norm": 3.3397512435913086, "learning_rate": 2.9900689655172415e-05, "loss": 0.2544, "step": 7230 }, { "epoch": 24.93448275862069, "grad_norm": 0.8628444075584412, "learning_rate": 2.9904827586206897e-05, "loss": 0.26, "step": 7231 }, { "epoch": 24.93793103448276, "grad_norm": 0.9980718493461609, "learning_rate": 2.990896551724138e-05, "loss": 0.2504, "step": 7232 }, { "epoch": 24.94137931034483, "grad_norm": 5.312636852264404, "learning_rate": 2.991310344827586e-05, "loss": 0.2321, "step": 7233 }, { "epoch": 24.944827586206898, "grad_norm": 3.7628397941589355, "learning_rate": 2.9917241379310347e-05, "loss": 0.2673, "step": 7234 }, { "epoch": 24.948275862068964, "grad_norm": 1.8401938676834106, "learning_rate": 2.992137931034483e-05, "loss": 0.3678, "step": 7235 }, { "epoch": 24.951724137931034, "grad_norm": 0.9477660655975342, "learning_rate": 2.992551724137931e-05, "loss": 0.31, "step": 7236 }, { "epoch": 24.955172413793104, "grad_norm": 1.1042778491973877, "learning_rate": 2.9929655172413793e-05, "loss": 0.2807, "step": 7237 }, { "epoch": 24.958620689655174, "grad_norm": 0.6888169050216675, "learning_rate": 2.993379310344828e-05, "loss": 0.256, "step": 7238 }, { "epoch": 24.96206896551724, "grad_norm": 0.7491300106048584, "learning_rate": 2.993793103448276e-05, "loss": 0.263, "step": 7239 }, { "epoch": 24.96551724137931, "grad_norm": 0.7856480479240417, "learning_rate": 2.9942068965517242e-05, "loss": 0.2542, "step": 7240 }, { "epoch": 24.96896551724138, "grad_norm": 0.5414701700210571, "learning_rate": 2.9946206896551724e-05, "loss": 0.2043, "step": 7241 }, { "epoch": 24.97241379310345, "grad_norm": 0.8103362321853638, "learning_rate": 2.9950344827586206e-05, "loss": 0.2313, "step": 7242 }, { "epoch": 24.97586206896552, "grad_norm": 0.8693047761917114, "learning_rate": 2.9954482758620692e-05, "loss": 0.2599, "step": 7243 }, { "epoch": 24.979310344827585, "grad_norm": 1.4014170169830322, "learning_rate": 2.995862068965517e-05, "loss": 0.251, "step": 7244 }, { "epoch": 24.982758620689655, "grad_norm": 1.2255706787109375, "learning_rate": 2.9962758620689656e-05, "loss": 0.2327, "step": 7245 }, { "epoch": 24.986206896551725, "grad_norm": 0.8193880319595337, "learning_rate": 2.9966896551724138e-05, "loss": 0.2456, "step": 7246 }, { "epoch": 24.989655172413794, "grad_norm": 0.8858811855316162, "learning_rate": 2.9971034482758623e-05, "loss": 0.2496, "step": 7247 }, { "epoch": 24.99310344827586, "grad_norm": 0.7334471344947815, "learning_rate": 2.9975172413793102e-05, "loss": 0.21, "step": 7248 }, { "epoch": 24.99655172413793, "grad_norm": 1.4590460062026978, "learning_rate": 2.9979310344827587e-05, "loss": 0.2493, "step": 7249 }, { "epoch": 25.0, "grad_norm": 1.4547908306121826, "learning_rate": 2.998344827586207e-05, "loss": 0.3582, "step": 7250 }, { "epoch": 25.00344827586207, "grad_norm": 1.8759151697158813, "learning_rate": 2.9987586206896555e-05, "loss": 0.2892, "step": 7251 }, { "epoch": 25.00689655172414, "grad_norm": 0.9833908677101135, "learning_rate": 2.9991724137931033e-05, "loss": 0.3002, "step": 7252 }, { "epoch": 25.010344827586206, "grad_norm": 1.1115492582321167, "learning_rate": 2.999586206896552e-05, "loss": 0.2611, "step": 7253 }, { "epoch": 25.013793103448275, "grad_norm": 1.0752180814743042, "learning_rate": 3e-05, "loss": 0.2773, "step": 7254 }, { "epoch": 25.017241379310345, "grad_norm": 0.6647335886955261, "learning_rate": 2.9999540229885056e-05, "loss": 0.2419, "step": 7255 }, { "epoch": 25.020689655172415, "grad_norm": 0.9575107097625732, "learning_rate": 2.9999080459770115e-05, "loss": 0.2442, "step": 7256 }, { "epoch": 25.02413793103448, "grad_norm": 1.1963382959365845, "learning_rate": 2.9998620689655173e-05, "loss": 0.2729, "step": 7257 }, { "epoch": 25.02758620689655, "grad_norm": 0.9183233976364136, "learning_rate": 2.9998160919540232e-05, "loss": 0.2458, "step": 7258 }, { "epoch": 25.03103448275862, "grad_norm": 2.265136241912842, "learning_rate": 2.9997701149425287e-05, "loss": 0.2353, "step": 7259 }, { "epoch": 25.03448275862069, "grad_norm": 0.6918526887893677, "learning_rate": 2.9997241379310343e-05, "loss": 0.2324, "step": 7260 }, { "epoch": 25.03793103448276, "grad_norm": 0.8791311979293823, "learning_rate": 2.9996781609195405e-05, "loss": 0.2501, "step": 7261 }, { "epoch": 25.041379310344826, "grad_norm": 0.8411352634429932, "learning_rate": 2.999632183908046e-05, "loss": 0.2648, "step": 7262 }, { "epoch": 25.044827586206896, "grad_norm": 0.769099235534668, "learning_rate": 2.999586206896552e-05, "loss": 0.2335, "step": 7263 }, { "epoch": 25.048275862068966, "grad_norm": 0.8601100444793701, "learning_rate": 2.9995402298850574e-05, "loss": 0.2136, "step": 7264 }, { "epoch": 25.051724137931036, "grad_norm": 1.4963493347167969, "learning_rate": 2.9994942528735633e-05, "loss": 0.2461, "step": 7265 }, { "epoch": 25.055172413793102, "grad_norm": 0.7470319867134094, "learning_rate": 2.999448275862069e-05, "loss": 0.2496, "step": 7266 }, { "epoch": 25.05862068965517, "grad_norm": 0.8754043579101562, "learning_rate": 2.9994022988505747e-05, "loss": 0.2573, "step": 7267 }, { "epoch": 25.06206896551724, "grad_norm": 0.8753653764724731, "learning_rate": 2.9993563218390805e-05, "loss": 0.2535, "step": 7268 }, { "epoch": 25.06551724137931, "grad_norm": 0.8766450881958008, "learning_rate": 2.9993103448275864e-05, "loss": 0.2415, "step": 7269 }, { "epoch": 25.06896551724138, "grad_norm": 1.535722255706787, "learning_rate": 2.999264367816092e-05, "loss": 0.2345, "step": 7270 }, { "epoch": 25.072413793103447, "grad_norm": 1.27076256275177, "learning_rate": 2.9992183908045978e-05, "loss": 0.2436, "step": 7271 }, { "epoch": 25.075862068965517, "grad_norm": 1.1007431745529175, "learning_rate": 2.9991724137931033e-05, "loss": 0.2285, "step": 7272 }, { "epoch": 25.079310344827586, "grad_norm": 1.354030728340149, "learning_rate": 2.9991264367816092e-05, "loss": 0.2275, "step": 7273 }, { "epoch": 25.082758620689656, "grad_norm": 1.1395246982574463, "learning_rate": 2.999080459770115e-05, "loss": 0.3281, "step": 7274 }, { "epoch": 25.086206896551722, "grad_norm": 2.356135606765747, "learning_rate": 2.9990344827586206e-05, "loss": 0.381, "step": 7275 }, { "epoch": 25.089655172413792, "grad_norm": 0.6784765720367432, "learning_rate": 2.9989885057471265e-05, "loss": 0.3139, "step": 7276 }, { "epoch": 25.093103448275862, "grad_norm": 0.6462969779968262, "learning_rate": 2.9989425287356323e-05, "loss": 0.2884, "step": 7277 }, { "epoch": 25.09655172413793, "grad_norm": 0.9600255489349365, "learning_rate": 2.998896551724138e-05, "loss": 0.2622, "step": 7278 }, { "epoch": 25.1, "grad_norm": 0.716878354549408, "learning_rate": 2.9988505747126437e-05, "loss": 0.2563, "step": 7279 }, { "epoch": 25.103448275862068, "grad_norm": 1.0194674730300903, "learning_rate": 2.9988045977011493e-05, "loss": 0.2664, "step": 7280 }, { "epoch": 25.106896551724137, "grad_norm": 0.8382388353347778, "learning_rate": 2.9987586206896555e-05, "loss": 0.2466, "step": 7281 }, { "epoch": 25.110344827586207, "grad_norm": 1.129486083984375, "learning_rate": 2.998712643678161e-05, "loss": 0.2592, "step": 7282 }, { "epoch": 25.113793103448277, "grad_norm": 0.6710191965103149, "learning_rate": 2.9986666666666665e-05, "loss": 0.2666, "step": 7283 }, { "epoch": 25.117241379310343, "grad_norm": 0.7655836343765259, "learning_rate": 2.9986206896551724e-05, "loss": 0.2077, "step": 7284 }, { "epoch": 25.120689655172413, "grad_norm": 1.8400827646255493, "learning_rate": 2.9985747126436783e-05, "loss": 0.2397, "step": 7285 }, { "epoch": 25.124137931034483, "grad_norm": 0.8192232251167297, "learning_rate": 2.998528735632184e-05, "loss": 0.2453, "step": 7286 }, { "epoch": 25.127586206896552, "grad_norm": 0.6650001406669617, "learning_rate": 2.9984827586206897e-05, "loss": 0.2341, "step": 7287 }, { "epoch": 25.131034482758622, "grad_norm": 1.095253586769104, "learning_rate": 2.9984367816091952e-05, "loss": 0.2174, "step": 7288 }, { "epoch": 25.13448275862069, "grad_norm": 0.8406015634536743, "learning_rate": 2.9983908045977014e-05, "loss": 0.2435, "step": 7289 }, { "epoch": 25.137931034482758, "grad_norm": 0.6653743982315063, "learning_rate": 2.998344827586207e-05, "loss": 0.2629, "step": 7290 }, { "epoch": 25.141379310344828, "grad_norm": 0.964615523815155, "learning_rate": 2.9982988505747128e-05, "loss": 0.2205, "step": 7291 }, { "epoch": 25.144827586206898, "grad_norm": 1.5713603496551514, "learning_rate": 2.9982528735632183e-05, "loss": 0.2468, "step": 7292 }, { "epoch": 25.148275862068967, "grad_norm": 0.9926580786705017, "learning_rate": 2.9982068965517242e-05, "loss": 0.2247, "step": 7293 }, { "epoch": 25.151724137931033, "grad_norm": 1.0646159648895264, "learning_rate": 2.99816091954023e-05, "loss": 0.2779, "step": 7294 }, { "epoch": 25.155172413793103, "grad_norm": 0.7112189531326294, "learning_rate": 2.9981149425287356e-05, "loss": 0.2525, "step": 7295 }, { "epoch": 25.158620689655173, "grad_norm": 1.0443063974380493, "learning_rate": 2.9980689655172415e-05, "loss": 0.2365, "step": 7296 }, { "epoch": 25.162068965517243, "grad_norm": 1.3499001264572144, "learning_rate": 2.9980229885057473e-05, "loss": 0.2315, "step": 7297 }, { "epoch": 25.16551724137931, "grad_norm": 1.385217547416687, "learning_rate": 2.997977011494253e-05, "loss": 0.2736, "step": 7298 }, { "epoch": 25.16896551724138, "grad_norm": 0.992094874382019, "learning_rate": 2.9979310344827587e-05, "loss": 0.2366, "step": 7299 }, { "epoch": 25.17241379310345, "grad_norm": 1.9335036277770996, "learning_rate": 2.9978850574712643e-05, "loss": 0.3953, "step": 7300 }, { "epoch": 25.175862068965518, "grad_norm": 0.9461590051651001, "learning_rate": 2.99783908045977e-05, "loss": 0.3246, "step": 7301 }, { "epoch": 25.179310344827588, "grad_norm": 1.3486109972000122, "learning_rate": 2.997793103448276e-05, "loss": 0.2424, "step": 7302 }, { "epoch": 25.182758620689654, "grad_norm": 0.7649715542793274, "learning_rate": 2.9977471264367815e-05, "loss": 0.28, "step": 7303 }, { "epoch": 25.186206896551724, "grad_norm": 0.5709327459335327, "learning_rate": 2.9977011494252874e-05, "loss": 0.2479, "step": 7304 }, { "epoch": 25.189655172413794, "grad_norm": 0.9794386029243469, "learning_rate": 2.9976551724137933e-05, "loss": 0.2518, "step": 7305 }, { "epoch": 25.193103448275863, "grad_norm": 0.6761652827262878, "learning_rate": 2.9976091954022988e-05, "loss": 0.2433, "step": 7306 }, { "epoch": 25.19655172413793, "grad_norm": 0.7570474147796631, "learning_rate": 2.9975632183908047e-05, "loss": 0.24, "step": 7307 }, { "epoch": 25.2, "grad_norm": 0.8445051312446594, "learning_rate": 2.9975172413793102e-05, "loss": 0.2525, "step": 7308 }, { "epoch": 25.20344827586207, "grad_norm": 0.5909362435340881, "learning_rate": 2.9974712643678164e-05, "loss": 0.2235, "step": 7309 }, { "epoch": 25.20689655172414, "grad_norm": 0.7471025586128235, "learning_rate": 2.997425287356322e-05, "loss": 0.2442, "step": 7310 }, { "epoch": 25.21034482758621, "grad_norm": 0.5308144092559814, "learning_rate": 2.9973793103448275e-05, "loss": 0.2431, "step": 7311 }, { "epoch": 25.213793103448275, "grad_norm": 0.5430887341499329, "learning_rate": 2.9973333333333333e-05, "loss": 0.2237, "step": 7312 }, { "epoch": 25.217241379310344, "grad_norm": 0.6102408766746521, "learning_rate": 2.9972873563218392e-05, "loss": 0.2345, "step": 7313 }, { "epoch": 25.220689655172414, "grad_norm": 0.9344853162765503, "learning_rate": 2.997241379310345e-05, "loss": 0.2438, "step": 7314 }, { "epoch": 25.224137931034484, "grad_norm": 1.1301089525222778, "learning_rate": 2.9971954022988506e-05, "loss": 0.224, "step": 7315 }, { "epoch": 25.22758620689655, "grad_norm": 0.6032621264457703, "learning_rate": 2.997149425287356e-05, "loss": 0.2314, "step": 7316 }, { "epoch": 25.23103448275862, "grad_norm": 0.8269198536872864, "learning_rate": 2.9971034482758623e-05, "loss": 0.2451, "step": 7317 }, { "epoch": 25.23448275862069, "grad_norm": 0.9675132632255554, "learning_rate": 2.997057471264368e-05, "loss": 0.2138, "step": 7318 }, { "epoch": 25.23793103448276, "grad_norm": 0.8238884210586548, "learning_rate": 2.9970114942528737e-05, "loss": 0.2288, "step": 7319 }, { "epoch": 25.24137931034483, "grad_norm": 0.5858955383300781, "learning_rate": 2.9969655172413793e-05, "loss": 0.2017, "step": 7320 }, { "epoch": 25.244827586206895, "grad_norm": 0.7726140022277832, "learning_rate": 2.996919540229885e-05, "loss": 0.2252, "step": 7321 }, { "epoch": 25.248275862068965, "grad_norm": 1.8972892761230469, "learning_rate": 2.996873563218391e-05, "loss": 0.2272, "step": 7322 }, { "epoch": 25.251724137931035, "grad_norm": 1.3998160362243652, "learning_rate": 2.9968275862068965e-05, "loss": 0.2395, "step": 7323 }, { "epoch": 25.255172413793105, "grad_norm": 1.276594877243042, "learning_rate": 2.9967816091954024e-05, "loss": 0.2766, "step": 7324 }, { "epoch": 25.25862068965517, "grad_norm": 1.8753927946090698, "learning_rate": 2.9967356321839083e-05, "loss": 0.3377, "step": 7325 }, { "epoch": 25.26206896551724, "grad_norm": 1.1396687030792236, "learning_rate": 2.9966896551724138e-05, "loss": 0.3208, "step": 7326 }, { "epoch": 25.26551724137931, "grad_norm": 0.9599342346191406, "learning_rate": 2.9966436781609197e-05, "loss": 0.2623, "step": 7327 }, { "epoch": 25.26896551724138, "grad_norm": 1.3141995668411255, "learning_rate": 2.9965977011494252e-05, "loss": 0.2866, "step": 7328 }, { "epoch": 25.27241379310345, "grad_norm": 1.464329719543457, "learning_rate": 2.996551724137931e-05, "loss": 0.289, "step": 7329 }, { "epoch": 25.275862068965516, "grad_norm": 0.8786224126815796, "learning_rate": 2.996505747126437e-05, "loss": 0.2316, "step": 7330 }, { "epoch": 25.279310344827586, "grad_norm": 1.9240955114364624, "learning_rate": 2.9964597701149424e-05, "loss": 0.2492, "step": 7331 }, { "epoch": 25.282758620689656, "grad_norm": 0.924940824508667, "learning_rate": 2.9964137931034483e-05, "loss": 0.2692, "step": 7332 }, { "epoch": 25.286206896551725, "grad_norm": 1.2956568002700806, "learning_rate": 2.9963678160919542e-05, "loss": 0.2866, "step": 7333 }, { "epoch": 25.28965517241379, "grad_norm": 0.6708676815032959, "learning_rate": 2.9963218390804597e-05, "loss": 0.2427, "step": 7334 }, { "epoch": 25.29310344827586, "grad_norm": 1.1049549579620361, "learning_rate": 2.9962758620689656e-05, "loss": 0.2428, "step": 7335 }, { "epoch": 25.29655172413793, "grad_norm": 0.8548804521560669, "learning_rate": 2.996229885057471e-05, "loss": 0.2433, "step": 7336 }, { "epoch": 25.3, "grad_norm": 0.760378360748291, "learning_rate": 2.9961839080459773e-05, "loss": 0.2695, "step": 7337 }, { "epoch": 25.30344827586207, "grad_norm": 1.8489983081817627, "learning_rate": 2.996137931034483e-05, "loss": 0.269, "step": 7338 }, { "epoch": 25.306896551724137, "grad_norm": 1.3712592124938965, "learning_rate": 2.9960919540229884e-05, "loss": 0.219, "step": 7339 }, { "epoch": 25.310344827586206, "grad_norm": 0.6255059838294983, "learning_rate": 2.9960459770114942e-05, "loss": 0.2503, "step": 7340 }, { "epoch": 25.313793103448276, "grad_norm": 0.5750928521156311, "learning_rate": 2.996e-05, "loss": 0.2331, "step": 7341 }, { "epoch": 25.317241379310346, "grad_norm": 0.6762431263923645, "learning_rate": 2.995954022988506e-05, "loss": 0.2199, "step": 7342 }, { "epoch": 25.320689655172412, "grad_norm": 2.887690782546997, "learning_rate": 2.9959080459770115e-05, "loss": 0.25, "step": 7343 }, { "epoch": 25.324137931034482, "grad_norm": 1.2995054721832275, "learning_rate": 2.995862068965517e-05, "loss": 0.2176, "step": 7344 }, { "epoch": 25.32758620689655, "grad_norm": 1.0825574398040771, "learning_rate": 2.9958160919540232e-05, "loss": 0.2396, "step": 7345 }, { "epoch": 25.33103448275862, "grad_norm": 0.7625570297241211, "learning_rate": 2.9957701149425288e-05, "loss": 0.2276, "step": 7346 }, { "epoch": 25.33448275862069, "grad_norm": 1.4446388483047485, "learning_rate": 2.9957241379310346e-05, "loss": 0.2302, "step": 7347 }, { "epoch": 25.337931034482757, "grad_norm": 0.9443039298057556, "learning_rate": 2.9956781609195402e-05, "loss": 0.2282, "step": 7348 }, { "epoch": 25.341379310344827, "grad_norm": 1.7221077680587769, "learning_rate": 2.995632183908046e-05, "loss": 0.2562, "step": 7349 }, { "epoch": 25.344827586206897, "grad_norm": 1.6686899662017822, "learning_rate": 2.995586206896552e-05, "loss": 0.3578, "step": 7350 }, { "epoch": 25.348275862068967, "grad_norm": 1.0253424644470215, "learning_rate": 2.9955402298850574e-05, "loss": 0.2912, "step": 7351 }, { "epoch": 25.351724137931033, "grad_norm": 0.8374273180961609, "learning_rate": 2.9954942528735633e-05, "loss": 0.2618, "step": 7352 }, { "epoch": 25.355172413793102, "grad_norm": 0.8036214113235474, "learning_rate": 2.9954482758620692e-05, "loss": 0.2642, "step": 7353 }, { "epoch": 25.358620689655172, "grad_norm": 0.49418336153030396, "learning_rate": 2.9954022988505747e-05, "loss": 0.267, "step": 7354 }, { "epoch": 25.362068965517242, "grad_norm": 0.7042911648750305, "learning_rate": 2.9953563218390806e-05, "loss": 0.2574, "step": 7355 }, { "epoch": 25.36551724137931, "grad_norm": 0.5549375414848328, "learning_rate": 2.995310344827586e-05, "loss": 0.2541, "step": 7356 }, { "epoch": 25.368965517241378, "grad_norm": 0.8920655846595764, "learning_rate": 2.995264367816092e-05, "loss": 0.2612, "step": 7357 }, { "epoch": 25.372413793103448, "grad_norm": 0.737216591835022, "learning_rate": 2.995218390804598e-05, "loss": 0.2491, "step": 7358 }, { "epoch": 25.375862068965517, "grad_norm": 0.7591504454612732, "learning_rate": 2.9951724137931034e-05, "loss": 0.2455, "step": 7359 }, { "epoch": 25.379310344827587, "grad_norm": 0.928723156452179, "learning_rate": 2.9951264367816092e-05, "loss": 0.2642, "step": 7360 }, { "epoch": 25.382758620689657, "grad_norm": 0.5850176215171814, "learning_rate": 2.995080459770115e-05, "loss": 0.2313, "step": 7361 }, { "epoch": 25.386206896551723, "grad_norm": 0.6532620787620544, "learning_rate": 2.9950344827586206e-05, "loss": 0.265, "step": 7362 }, { "epoch": 25.389655172413793, "grad_norm": 0.7779357433319092, "learning_rate": 2.9949885057471265e-05, "loss": 0.2257, "step": 7363 }, { "epoch": 25.393103448275863, "grad_norm": 0.8072213530540466, "learning_rate": 2.994942528735632e-05, "loss": 0.254, "step": 7364 }, { "epoch": 25.396551724137932, "grad_norm": 0.8151782155036926, "learning_rate": 2.9948965517241382e-05, "loss": 0.2285, "step": 7365 }, { "epoch": 25.4, "grad_norm": 0.6476213335990906, "learning_rate": 2.9948505747126438e-05, "loss": 0.2341, "step": 7366 }, { "epoch": 25.40344827586207, "grad_norm": 0.9326794147491455, "learning_rate": 2.9948045977011493e-05, "loss": 0.2209, "step": 7367 }, { "epoch": 25.406896551724138, "grad_norm": 1.271782636642456, "learning_rate": 2.9947586206896552e-05, "loss": 0.2298, "step": 7368 }, { "epoch": 25.410344827586208, "grad_norm": 1.2073259353637695, "learning_rate": 2.994712643678161e-05, "loss": 0.2319, "step": 7369 }, { "epoch": 25.413793103448278, "grad_norm": 0.8076019287109375, "learning_rate": 2.994666666666667e-05, "loss": 0.2418, "step": 7370 }, { "epoch": 25.417241379310344, "grad_norm": 0.8939073085784912, "learning_rate": 2.9946206896551724e-05, "loss": 0.2421, "step": 7371 }, { "epoch": 25.420689655172414, "grad_norm": 0.8745260834693909, "learning_rate": 2.994574712643678e-05, "loss": 0.2309, "step": 7372 }, { "epoch": 25.424137931034483, "grad_norm": 0.8811714053153992, "learning_rate": 2.9945287356321842e-05, "loss": 0.237, "step": 7373 }, { "epoch": 25.427586206896553, "grad_norm": 1.9744620323181152, "learning_rate": 2.9944827586206897e-05, "loss": 0.2765, "step": 7374 }, { "epoch": 25.43103448275862, "grad_norm": 2.0728800296783447, "learning_rate": 2.9944367816091956e-05, "loss": 0.3535, "step": 7375 }, { "epoch": 25.43448275862069, "grad_norm": 0.5660450458526611, "learning_rate": 2.994390804597701e-05, "loss": 0.2826, "step": 7376 }, { "epoch": 25.43793103448276, "grad_norm": 0.5579472780227661, "learning_rate": 2.994344827586207e-05, "loss": 0.2747, "step": 7377 }, { "epoch": 25.44137931034483, "grad_norm": 0.5969959497451782, "learning_rate": 2.994298850574713e-05, "loss": 0.2744, "step": 7378 }, { "epoch": 25.444827586206898, "grad_norm": 1.0184921026229858, "learning_rate": 2.9942528735632184e-05, "loss": 0.2423, "step": 7379 }, { "epoch": 25.448275862068964, "grad_norm": 0.5217795372009277, "learning_rate": 2.9942068965517242e-05, "loss": 0.254, "step": 7380 }, { "epoch": 25.451724137931034, "grad_norm": 0.8298112154006958, "learning_rate": 2.99416091954023e-05, "loss": 0.2563, "step": 7381 }, { "epoch": 25.455172413793104, "grad_norm": 0.5835206508636475, "learning_rate": 2.9941149425287356e-05, "loss": 0.2284, "step": 7382 }, { "epoch": 25.458620689655174, "grad_norm": 0.5929776430130005, "learning_rate": 2.9940689655172415e-05, "loss": 0.2706, "step": 7383 }, { "epoch": 25.46206896551724, "grad_norm": 0.552271842956543, "learning_rate": 2.994022988505747e-05, "loss": 0.2345, "step": 7384 }, { "epoch": 25.46551724137931, "grad_norm": 0.9531169533729553, "learning_rate": 2.9939770114942532e-05, "loss": 0.2395, "step": 7385 }, { "epoch": 25.46896551724138, "grad_norm": 1.562746524810791, "learning_rate": 2.9939310344827588e-05, "loss": 0.2703, "step": 7386 }, { "epoch": 25.47241379310345, "grad_norm": 1.2389354705810547, "learning_rate": 2.9938850574712643e-05, "loss": 0.2178, "step": 7387 }, { "epoch": 25.47586206896552, "grad_norm": 0.5261808633804321, "learning_rate": 2.99383908045977e-05, "loss": 0.269, "step": 7388 }, { "epoch": 25.479310344827585, "grad_norm": 0.7868502140045166, "learning_rate": 2.993793103448276e-05, "loss": 0.2592, "step": 7389 }, { "epoch": 25.482758620689655, "grad_norm": 0.5984821915626526, "learning_rate": 2.9937471264367816e-05, "loss": 0.2601, "step": 7390 }, { "epoch": 25.486206896551725, "grad_norm": 0.9140755534172058, "learning_rate": 2.9937011494252874e-05, "loss": 0.2379, "step": 7391 }, { "epoch": 25.489655172413794, "grad_norm": 0.7007191777229309, "learning_rate": 2.993655172413793e-05, "loss": 0.2454, "step": 7392 }, { "epoch": 25.49310344827586, "grad_norm": 1.0220264196395874, "learning_rate": 2.993609195402299e-05, "loss": 0.213, "step": 7393 }, { "epoch": 25.49655172413793, "grad_norm": 0.810256838798523, "learning_rate": 2.9935632183908047e-05, "loss": 0.2294, "step": 7394 }, { "epoch": 25.5, "grad_norm": 0.7349443435668945, "learning_rate": 2.9935172413793102e-05, "loss": 0.2251, "step": 7395 }, { "epoch": 25.50344827586207, "grad_norm": 1.1629854440689087, "learning_rate": 2.993471264367816e-05, "loss": 0.2632, "step": 7396 }, { "epoch": 25.50689655172414, "grad_norm": 1.7502479553222656, "learning_rate": 2.993425287356322e-05, "loss": 0.2379, "step": 7397 }, { "epoch": 25.510344827586206, "grad_norm": 0.9820802807807922, "learning_rate": 2.993379310344828e-05, "loss": 0.2257, "step": 7398 }, { "epoch": 25.513793103448275, "grad_norm": 1.250919222831726, "learning_rate": 2.9933333333333334e-05, "loss": 0.2648, "step": 7399 }, { "epoch": 25.517241379310345, "grad_norm": 1.7139846086502075, "learning_rate": 2.993287356321839e-05, "loss": 0.3033, "step": 7400 }, { "epoch": 25.520689655172415, "grad_norm": 1.3856608867645264, "learning_rate": 2.993241379310345e-05, "loss": 0.349, "step": 7401 }, { "epoch": 25.52413793103448, "grad_norm": 0.6204547882080078, "learning_rate": 2.9931954022988506e-05, "loss": 0.2619, "step": 7402 }, { "epoch": 25.52758620689655, "grad_norm": 0.6777874231338501, "learning_rate": 2.9931494252873565e-05, "loss": 0.2964, "step": 7403 }, { "epoch": 25.53103448275862, "grad_norm": 0.7734145522117615, "learning_rate": 2.993103448275862e-05, "loss": 0.2619, "step": 7404 }, { "epoch": 25.53448275862069, "grad_norm": 0.7935417890548706, "learning_rate": 2.993057471264368e-05, "loss": 0.2704, "step": 7405 }, { "epoch": 25.53793103448276, "grad_norm": 0.6404404044151306, "learning_rate": 2.9930114942528738e-05, "loss": 0.2863, "step": 7406 }, { "epoch": 25.541379310344826, "grad_norm": 0.6421770453453064, "learning_rate": 2.9929655172413793e-05, "loss": 0.2656, "step": 7407 }, { "epoch": 25.544827586206896, "grad_norm": 0.6753381490707397, "learning_rate": 2.992919540229885e-05, "loss": 0.2386, "step": 7408 }, { "epoch": 25.548275862068966, "grad_norm": 1.6597764492034912, "learning_rate": 2.992873563218391e-05, "loss": 0.277, "step": 7409 }, { "epoch": 25.551724137931036, "grad_norm": 0.6672688722610474, "learning_rate": 2.9928275862068966e-05, "loss": 0.2139, "step": 7410 }, { "epoch": 25.555172413793102, "grad_norm": 0.8068071007728577, "learning_rate": 2.9927816091954024e-05, "loss": 0.2347, "step": 7411 }, { "epoch": 25.55862068965517, "grad_norm": 0.9554126858711243, "learning_rate": 2.992735632183908e-05, "loss": 0.274, "step": 7412 }, { "epoch": 25.56206896551724, "grad_norm": 0.7060416340827942, "learning_rate": 2.992689655172414e-05, "loss": 0.25, "step": 7413 }, { "epoch": 25.56551724137931, "grad_norm": 0.7261428236961365, "learning_rate": 2.9926436781609197e-05, "loss": 0.2589, "step": 7414 }, { "epoch": 25.56896551724138, "grad_norm": 0.5980456471443176, "learning_rate": 2.9925977011494252e-05, "loss": 0.2452, "step": 7415 }, { "epoch": 25.572413793103447, "grad_norm": 1.0542794466018677, "learning_rate": 2.992551724137931e-05, "loss": 0.2412, "step": 7416 }, { "epoch": 25.575862068965517, "grad_norm": 1.3378353118896484, "learning_rate": 2.992505747126437e-05, "loss": 0.231, "step": 7417 }, { "epoch": 25.579310344827586, "grad_norm": 0.8478876352310181, "learning_rate": 2.9924597701149425e-05, "loss": 0.2289, "step": 7418 }, { "epoch": 25.582758620689656, "grad_norm": 3.7499783039093018, "learning_rate": 2.9924137931034484e-05, "loss": 0.2392, "step": 7419 }, { "epoch": 25.586206896551722, "grad_norm": 1.1565717458724976, "learning_rate": 2.992367816091954e-05, "loss": 0.2454, "step": 7420 }, { "epoch": 25.589655172413792, "grad_norm": 1.0338526964187622, "learning_rate": 2.99232183908046e-05, "loss": 0.2656, "step": 7421 }, { "epoch": 25.593103448275862, "grad_norm": 1.2412279844284058, "learning_rate": 2.9922758620689656e-05, "loss": 0.2202, "step": 7422 }, { "epoch": 25.59655172413793, "grad_norm": 0.7389034628868103, "learning_rate": 2.992229885057471e-05, "loss": 0.282, "step": 7423 }, { "epoch": 25.6, "grad_norm": 1.7811903953552246, "learning_rate": 2.992183908045977e-05, "loss": 0.2704, "step": 7424 }, { "epoch": 25.603448275862068, "grad_norm": 1.687027096748352, "learning_rate": 2.992137931034483e-05, "loss": 0.3657, "step": 7425 }, { "epoch": 25.606896551724137, "grad_norm": 0.6698805093765259, "learning_rate": 2.9920919540229888e-05, "loss": 0.2881, "step": 7426 }, { "epoch": 25.610344827586207, "grad_norm": 0.6121231317520142, "learning_rate": 2.9920459770114943e-05, "loss": 0.2538, "step": 7427 }, { "epoch": 25.613793103448277, "grad_norm": 0.5622205138206482, "learning_rate": 2.9919999999999998e-05, "loss": 0.2408, "step": 7428 }, { "epoch": 25.617241379310343, "grad_norm": 0.6070341467857361, "learning_rate": 2.991954022988506e-05, "loss": 0.2848, "step": 7429 }, { "epoch": 25.620689655172413, "grad_norm": 0.6392623782157898, "learning_rate": 2.9919080459770116e-05, "loss": 0.2865, "step": 7430 }, { "epoch": 25.624137931034483, "grad_norm": 1.3563575744628906, "learning_rate": 2.9918620689655174e-05, "loss": 0.2553, "step": 7431 }, { "epoch": 25.627586206896552, "grad_norm": 0.7436568140983582, "learning_rate": 2.991816091954023e-05, "loss": 0.2633, "step": 7432 }, { "epoch": 25.631034482758622, "grad_norm": 0.618407130241394, "learning_rate": 2.9917701149425288e-05, "loss": 0.2521, "step": 7433 }, { "epoch": 25.63448275862069, "grad_norm": 0.5986953377723694, "learning_rate": 2.9917241379310347e-05, "loss": 0.2542, "step": 7434 }, { "epoch": 25.637931034482758, "grad_norm": 0.6466574668884277, "learning_rate": 2.9916781609195402e-05, "loss": 0.2323, "step": 7435 }, { "epoch": 25.641379310344828, "grad_norm": 0.9068517088890076, "learning_rate": 2.991632183908046e-05, "loss": 0.2249, "step": 7436 }, { "epoch": 25.644827586206898, "grad_norm": 0.6739405989646912, "learning_rate": 2.991586206896552e-05, "loss": 0.25, "step": 7437 }, { "epoch": 25.648275862068964, "grad_norm": 1.3640192747116089, "learning_rate": 2.9915402298850575e-05, "loss": 0.2451, "step": 7438 }, { "epoch": 25.651724137931033, "grad_norm": 0.6568501591682434, "learning_rate": 2.9914942528735633e-05, "loss": 0.2219, "step": 7439 }, { "epoch": 25.655172413793103, "grad_norm": 0.7429285645484924, "learning_rate": 2.991448275862069e-05, "loss": 0.2288, "step": 7440 }, { "epoch": 25.658620689655173, "grad_norm": 0.637138843536377, "learning_rate": 2.991402298850575e-05, "loss": 0.2021, "step": 7441 }, { "epoch": 25.662068965517243, "grad_norm": 0.7281420230865479, "learning_rate": 2.9913563218390806e-05, "loss": 0.2181, "step": 7442 }, { "epoch": 25.66551724137931, "grad_norm": 1.0037997961044312, "learning_rate": 2.991310344827586e-05, "loss": 0.2268, "step": 7443 }, { "epoch": 25.66896551724138, "grad_norm": 1.1482863426208496, "learning_rate": 2.991264367816092e-05, "loss": 0.2313, "step": 7444 }, { "epoch": 25.67241379310345, "grad_norm": 1.001513123512268, "learning_rate": 2.991218390804598e-05, "loss": 0.2482, "step": 7445 }, { "epoch": 25.675862068965518, "grad_norm": 1.7707738876342773, "learning_rate": 2.9911724137931034e-05, "loss": 0.2058, "step": 7446 }, { "epoch": 25.679310344827588, "grad_norm": 1.010115385055542, "learning_rate": 2.9911264367816093e-05, "loss": 0.2339, "step": 7447 }, { "epoch": 25.682758620689654, "grad_norm": 0.8416163921356201, "learning_rate": 2.9910804597701148e-05, "loss": 0.2557, "step": 7448 }, { "epoch": 25.686206896551724, "grad_norm": 1.645012378692627, "learning_rate": 2.991034482758621e-05, "loss": 0.2399, "step": 7449 }, { "epoch": 25.689655172413794, "grad_norm": 1.6244795322418213, "learning_rate": 2.9909885057471265e-05, "loss": 0.292, "step": 7450 }, { "epoch": 25.693103448275863, "grad_norm": 0.6191885471343994, "learning_rate": 2.990942528735632e-05, "loss": 0.2899, "step": 7451 }, { "epoch": 25.69655172413793, "grad_norm": 0.7414783239364624, "learning_rate": 2.990896551724138e-05, "loss": 0.3237, "step": 7452 }, { "epoch": 25.7, "grad_norm": 0.7280526161193848, "learning_rate": 2.9908505747126438e-05, "loss": 0.2601, "step": 7453 }, { "epoch": 25.70344827586207, "grad_norm": 0.6611649394035339, "learning_rate": 2.9908045977011497e-05, "loss": 0.2333, "step": 7454 }, { "epoch": 25.70689655172414, "grad_norm": 0.49724867939949036, "learning_rate": 2.9907586206896552e-05, "loss": 0.2423, "step": 7455 }, { "epoch": 25.71034482758621, "grad_norm": 0.7046584486961365, "learning_rate": 2.9907126436781607e-05, "loss": 0.238, "step": 7456 }, { "epoch": 25.713793103448275, "grad_norm": 1.5892239809036255, "learning_rate": 2.990666666666667e-05, "loss": 0.2617, "step": 7457 }, { "epoch": 25.717241379310344, "grad_norm": 1.319098711013794, "learning_rate": 2.9906206896551725e-05, "loss": 0.2551, "step": 7458 }, { "epoch": 25.720689655172414, "grad_norm": 0.6186444163322449, "learning_rate": 2.9905747126436783e-05, "loss": 0.245, "step": 7459 }, { "epoch": 25.724137931034484, "grad_norm": 0.9873375296592712, "learning_rate": 2.990528735632184e-05, "loss": 0.2379, "step": 7460 }, { "epoch": 25.72758620689655, "grad_norm": 0.8676879405975342, "learning_rate": 2.9904827586206897e-05, "loss": 0.2267, "step": 7461 }, { "epoch": 25.73103448275862, "grad_norm": 0.9580457806587219, "learning_rate": 2.9904367816091956e-05, "loss": 0.2501, "step": 7462 }, { "epoch": 25.73448275862069, "grad_norm": 0.6865968704223633, "learning_rate": 2.990390804597701e-05, "loss": 0.2304, "step": 7463 }, { "epoch": 25.73793103448276, "grad_norm": 0.5521945357322693, "learning_rate": 2.990344827586207e-05, "loss": 0.2253, "step": 7464 }, { "epoch": 25.74137931034483, "grad_norm": 1.2025017738342285, "learning_rate": 2.990298850574713e-05, "loss": 0.2704, "step": 7465 }, { "epoch": 25.744827586206895, "grad_norm": 0.7577502131462097, "learning_rate": 2.9902528735632184e-05, "loss": 0.228, "step": 7466 }, { "epoch": 25.748275862068965, "grad_norm": 1.2410584688186646, "learning_rate": 2.9902068965517243e-05, "loss": 0.2222, "step": 7467 }, { "epoch": 25.751724137931035, "grad_norm": 1.0798039436340332, "learning_rate": 2.9901609195402298e-05, "loss": 0.2435, "step": 7468 }, { "epoch": 25.755172413793105, "grad_norm": 1.3857407569885254, "learning_rate": 2.990114942528736e-05, "loss": 0.2243, "step": 7469 }, { "epoch": 25.75862068965517, "grad_norm": 1.4243241548538208, "learning_rate": 2.9900689655172415e-05, "loss": 0.2289, "step": 7470 }, { "epoch": 25.76206896551724, "grad_norm": 0.8145096302032471, "learning_rate": 2.990022988505747e-05, "loss": 0.263, "step": 7471 }, { "epoch": 25.76551724137931, "grad_norm": 0.9644615650177002, "learning_rate": 2.989977011494253e-05, "loss": 0.2241, "step": 7472 }, { "epoch": 25.76896551724138, "grad_norm": 1.0390770435333252, "learning_rate": 2.9899310344827588e-05, "loss": 0.2491, "step": 7473 }, { "epoch": 25.77241379310345, "grad_norm": 1.057323694229126, "learning_rate": 2.9898850574712647e-05, "loss": 0.2773, "step": 7474 }, { "epoch": 25.775862068965516, "grad_norm": 3.3859081268310547, "learning_rate": 2.9898390804597702e-05, "loss": 0.3259, "step": 7475 }, { "epoch": 25.779310344827586, "grad_norm": 0.6130096316337585, "learning_rate": 2.9897931034482757e-05, "loss": 0.3114, "step": 7476 }, { "epoch": 25.782758620689656, "grad_norm": 0.5618555545806885, "learning_rate": 2.989747126436782e-05, "loss": 0.2386, "step": 7477 }, { "epoch": 25.786206896551725, "grad_norm": 0.7367674708366394, "learning_rate": 2.9897011494252875e-05, "loss": 0.2669, "step": 7478 }, { "epoch": 25.78965517241379, "grad_norm": 1.0917117595672607, "learning_rate": 2.989655172413793e-05, "loss": 0.2505, "step": 7479 }, { "epoch": 25.79310344827586, "grad_norm": 0.6566345691680908, "learning_rate": 2.989609195402299e-05, "loss": 0.2222, "step": 7480 }, { "epoch": 25.79655172413793, "grad_norm": 1.2988420724868774, "learning_rate": 2.9895632183908047e-05, "loss": 0.2307, "step": 7481 }, { "epoch": 25.8, "grad_norm": 2.4093573093414307, "learning_rate": 2.9895172413793106e-05, "loss": 0.2604, "step": 7482 }, { "epoch": 25.80344827586207, "grad_norm": 1.3337483406066895, "learning_rate": 2.989471264367816e-05, "loss": 0.2765, "step": 7483 }, { "epoch": 25.806896551724137, "grad_norm": 0.6971061825752258, "learning_rate": 2.9894252873563217e-05, "loss": 0.2382, "step": 7484 }, { "epoch": 25.810344827586206, "grad_norm": 1.296338677406311, "learning_rate": 2.989379310344828e-05, "loss": 0.2509, "step": 7485 }, { "epoch": 25.813793103448276, "grad_norm": 0.8192517757415771, "learning_rate": 2.9893333333333334e-05, "loss": 0.2285, "step": 7486 }, { "epoch": 25.817241379310346, "grad_norm": 0.6008008122444153, "learning_rate": 2.9892873563218393e-05, "loss": 0.2524, "step": 7487 }, { "epoch": 25.820689655172412, "grad_norm": 0.9987444281578064, "learning_rate": 2.9892413793103448e-05, "loss": 0.2204, "step": 7488 }, { "epoch": 25.824137931034482, "grad_norm": 1.0814595222473145, "learning_rate": 2.9891954022988507e-05, "loss": 0.2439, "step": 7489 }, { "epoch": 25.82758620689655, "grad_norm": 0.6372995376586914, "learning_rate": 2.9891494252873565e-05, "loss": 0.2543, "step": 7490 }, { "epoch": 25.83103448275862, "grad_norm": 0.7338698506355286, "learning_rate": 2.989103448275862e-05, "loss": 0.2376, "step": 7491 }, { "epoch": 25.83448275862069, "grad_norm": 0.7930976748466492, "learning_rate": 2.989057471264368e-05, "loss": 0.2263, "step": 7492 }, { "epoch": 25.837931034482757, "grad_norm": 0.9615504741668701, "learning_rate": 2.9890114942528738e-05, "loss": 0.2592, "step": 7493 }, { "epoch": 25.841379310344827, "grad_norm": 1.2826151847839355, "learning_rate": 2.9889655172413793e-05, "loss": 0.257, "step": 7494 }, { "epoch": 25.844827586206897, "grad_norm": 0.969818115234375, "learning_rate": 2.9889195402298852e-05, "loss": 0.2259, "step": 7495 }, { "epoch": 25.848275862068967, "grad_norm": 1.5938169956207275, "learning_rate": 2.9888735632183907e-05, "loss": 0.2202, "step": 7496 }, { "epoch": 25.851724137931036, "grad_norm": 1.377209186553955, "learning_rate": 2.988827586206897e-05, "loss": 0.2276, "step": 7497 }, { "epoch": 25.855172413793102, "grad_norm": 1.2613543272018433, "learning_rate": 2.9887816091954025e-05, "loss": 0.2012, "step": 7498 }, { "epoch": 25.858620689655172, "grad_norm": 1.3427752256393433, "learning_rate": 2.988735632183908e-05, "loss": 0.246, "step": 7499 }, { "epoch": 25.862068965517242, "grad_norm": 6.46544075012207, "learning_rate": 2.988689655172414e-05, "loss": 0.2998, "step": 7500 }, { "epoch": 25.86551724137931, "grad_norm": 1.090933084487915, "learning_rate": 2.9886436781609197e-05, "loss": 0.357, "step": 7501 }, { "epoch": 25.868965517241378, "grad_norm": 0.7554348111152649, "learning_rate": 2.9885977011494256e-05, "loss": 0.2796, "step": 7502 }, { "epoch": 25.872413793103448, "grad_norm": 0.9908784031867981, "learning_rate": 2.988551724137931e-05, "loss": 0.2966, "step": 7503 }, { "epoch": 25.875862068965517, "grad_norm": 0.9906633496284485, "learning_rate": 2.9885057471264367e-05, "loss": 0.2826, "step": 7504 }, { "epoch": 25.879310344827587, "grad_norm": 0.7973964810371399, "learning_rate": 2.988459770114943e-05, "loss": 0.2803, "step": 7505 }, { "epoch": 25.882758620689657, "grad_norm": 0.6287744045257568, "learning_rate": 2.9884137931034484e-05, "loss": 0.2365, "step": 7506 }, { "epoch": 25.886206896551723, "grad_norm": 0.6175853610038757, "learning_rate": 2.988367816091954e-05, "loss": 0.2581, "step": 7507 }, { "epoch": 25.889655172413793, "grad_norm": 0.8366554379463196, "learning_rate": 2.9883218390804598e-05, "loss": 0.2237, "step": 7508 }, { "epoch": 25.893103448275863, "grad_norm": 0.7913941144943237, "learning_rate": 2.9882758620689657e-05, "loss": 0.2564, "step": 7509 }, { "epoch": 25.896551724137932, "grad_norm": 0.9610850811004639, "learning_rate": 2.9882298850574715e-05, "loss": 0.2257, "step": 7510 }, { "epoch": 25.9, "grad_norm": 1.010304570198059, "learning_rate": 2.988183908045977e-05, "loss": 0.2945, "step": 7511 }, { "epoch": 25.90344827586207, "grad_norm": 0.7622476816177368, "learning_rate": 2.9881379310344826e-05, "loss": 0.2185, "step": 7512 }, { "epoch": 25.906896551724138, "grad_norm": 0.9940056800842285, "learning_rate": 2.9880919540229888e-05, "loss": 0.2352, "step": 7513 }, { "epoch": 25.910344827586208, "grad_norm": 1.0321284532546997, "learning_rate": 2.9880459770114943e-05, "loss": 0.259, "step": 7514 }, { "epoch": 25.913793103448278, "grad_norm": 0.704287052154541, "learning_rate": 2.9880000000000002e-05, "loss": 0.2326, "step": 7515 }, { "epoch": 25.917241379310344, "grad_norm": 0.6976174116134644, "learning_rate": 2.9879540229885057e-05, "loss": 0.2431, "step": 7516 }, { "epoch": 25.920689655172414, "grad_norm": 1.1354167461395264, "learning_rate": 2.9879080459770116e-05, "loss": 0.2333, "step": 7517 }, { "epoch": 25.924137931034483, "grad_norm": 1.4783886671066284, "learning_rate": 2.9878620689655175e-05, "loss": 0.2076, "step": 7518 }, { "epoch": 25.927586206896553, "grad_norm": 1.839263916015625, "learning_rate": 2.987816091954023e-05, "loss": 0.2465, "step": 7519 }, { "epoch": 25.93103448275862, "grad_norm": 1.6701362133026123, "learning_rate": 2.987770114942529e-05, "loss": 0.2804, "step": 7520 }, { "epoch": 25.93448275862069, "grad_norm": 1.1459345817565918, "learning_rate": 2.9877241379310347e-05, "loss": 0.2666, "step": 7521 }, { "epoch": 25.93793103448276, "grad_norm": 0.8767188787460327, "learning_rate": 2.9876781609195403e-05, "loss": 0.2419, "step": 7522 }, { "epoch": 25.94137931034483, "grad_norm": 0.7385971546173096, "learning_rate": 2.987632183908046e-05, "loss": 0.2332, "step": 7523 }, { "epoch": 25.944827586206898, "grad_norm": 3.460210084915161, "learning_rate": 2.9875862068965517e-05, "loss": 0.2965, "step": 7524 }, { "epoch": 25.948275862068964, "grad_norm": 1.40106999874115, "learning_rate": 2.987540229885058e-05, "loss": 0.3554, "step": 7525 }, { "epoch": 25.951724137931034, "grad_norm": 1.3402115106582642, "learning_rate": 2.9874942528735634e-05, "loss": 0.2838, "step": 7526 }, { "epoch": 25.955172413793104, "grad_norm": 1.0346088409423828, "learning_rate": 2.987448275862069e-05, "loss": 0.2376, "step": 7527 }, { "epoch": 25.958620689655174, "grad_norm": 0.4962213933467865, "learning_rate": 2.9874022988505748e-05, "loss": 0.2612, "step": 7528 }, { "epoch": 25.96206896551724, "grad_norm": 0.6978166103363037, "learning_rate": 2.9873563218390807e-05, "loss": 0.2673, "step": 7529 }, { "epoch": 25.96551724137931, "grad_norm": 0.6065405607223511, "learning_rate": 2.9873103448275865e-05, "loss": 0.2242, "step": 7530 }, { "epoch": 25.96896551724138, "grad_norm": 0.8265593647956848, "learning_rate": 2.987264367816092e-05, "loss": 0.2241, "step": 7531 }, { "epoch": 25.97241379310345, "grad_norm": 0.6630332469940186, "learning_rate": 2.9872183908045976e-05, "loss": 0.229, "step": 7532 }, { "epoch": 25.97586206896552, "grad_norm": 0.8987392783164978, "learning_rate": 2.9871724137931038e-05, "loss": 0.2396, "step": 7533 }, { "epoch": 25.979310344827585, "grad_norm": 2.0223844051361084, "learning_rate": 2.9871264367816093e-05, "loss": 0.2478, "step": 7534 }, { "epoch": 25.982758620689655, "grad_norm": 1.164928674697876, "learning_rate": 2.987080459770115e-05, "loss": 0.2322, "step": 7535 }, { "epoch": 25.986206896551725, "grad_norm": 1.0603328943252563, "learning_rate": 2.9870344827586207e-05, "loss": 0.2646, "step": 7536 }, { "epoch": 25.989655172413794, "grad_norm": 1.5101172924041748, "learning_rate": 2.9869885057471266e-05, "loss": 0.2392, "step": 7537 }, { "epoch": 25.99310344827586, "grad_norm": 1.992369294166565, "learning_rate": 2.9869425287356325e-05, "loss": 0.2613, "step": 7538 }, { "epoch": 25.99655172413793, "grad_norm": 4.581329345703125, "learning_rate": 2.986896551724138e-05, "loss": 0.2636, "step": 7539 }, { "epoch": 26.0, "grad_norm": 2.833204507827759, "learning_rate": 2.9868505747126435e-05, "loss": 0.282, "step": 7540 }, { "epoch": 26.00344827586207, "grad_norm": 0.862352192401886, "learning_rate": 2.9868045977011497e-05, "loss": 0.2929, "step": 7541 }, { "epoch": 26.00689655172414, "grad_norm": 0.6740588545799255, "learning_rate": 2.9867586206896552e-05, "loss": 0.249, "step": 7542 }, { "epoch": 26.010344827586206, "grad_norm": 0.6449402570724487, "learning_rate": 2.986712643678161e-05, "loss": 0.2604, "step": 7543 }, { "epoch": 26.013793103448275, "grad_norm": 0.8428400158882141, "learning_rate": 2.9866666666666666e-05, "loss": 0.2567, "step": 7544 }, { "epoch": 26.017241379310345, "grad_norm": 0.4962921142578125, "learning_rate": 2.9866206896551725e-05, "loss": 0.2408, "step": 7545 }, { "epoch": 26.020689655172415, "grad_norm": 1.0060112476348877, "learning_rate": 2.9865747126436784e-05, "loss": 0.2745, "step": 7546 }, { "epoch": 26.02413793103448, "grad_norm": 0.7610726952552795, "learning_rate": 2.986528735632184e-05, "loss": 0.2651, "step": 7547 }, { "epoch": 26.02758620689655, "grad_norm": 0.7193765044212341, "learning_rate": 2.9864827586206898e-05, "loss": 0.2323, "step": 7548 }, { "epoch": 26.03103448275862, "grad_norm": 0.6602351665496826, "learning_rate": 2.9864367816091956e-05, "loss": 0.2418, "step": 7549 }, { "epoch": 26.03448275862069, "grad_norm": 1.152506947517395, "learning_rate": 2.9863908045977012e-05, "loss": 0.2468, "step": 7550 }, { "epoch": 26.03793103448276, "grad_norm": 0.9209145307540894, "learning_rate": 2.986344827586207e-05, "loss": 0.2604, "step": 7551 }, { "epoch": 26.041379310344826, "grad_norm": 1.007346272468567, "learning_rate": 2.9862988505747126e-05, "loss": 0.2462, "step": 7552 }, { "epoch": 26.044827586206896, "grad_norm": 1.2147691249847412, "learning_rate": 2.9862528735632188e-05, "loss": 0.2278, "step": 7553 }, { "epoch": 26.048275862068966, "grad_norm": 0.7525991201400757, "learning_rate": 2.9862068965517243e-05, "loss": 0.2235, "step": 7554 }, { "epoch": 26.051724137931036, "grad_norm": 0.8393265604972839, "learning_rate": 2.98616091954023e-05, "loss": 0.2257, "step": 7555 }, { "epoch": 26.055172413793102, "grad_norm": 1.256007432937622, "learning_rate": 2.9861149425287357e-05, "loss": 0.2498, "step": 7556 }, { "epoch": 26.05862068965517, "grad_norm": 1.5076701641082764, "learning_rate": 2.9860689655172412e-05, "loss": 0.2211, "step": 7557 }, { "epoch": 26.06206896551724, "grad_norm": 0.9761775732040405, "learning_rate": 2.9860229885057474e-05, "loss": 0.2323, "step": 7558 }, { "epoch": 26.06551724137931, "grad_norm": 1.0649349689483643, "learning_rate": 2.985977011494253e-05, "loss": 0.2331, "step": 7559 }, { "epoch": 26.06896551724138, "grad_norm": 0.7487540245056152, "learning_rate": 2.9859310344827585e-05, "loss": 0.2122, "step": 7560 }, { "epoch": 26.072413793103447, "grad_norm": 1.5603375434875488, "learning_rate": 2.9858850574712644e-05, "loss": 0.2249, "step": 7561 }, { "epoch": 26.075862068965517, "grad_norm": 0.8875274062156677, "learning_rate": 2.9858390804597702e-05, "loss": 0.2203, "step": 7562 }, { "epoch": 26.079310344827586, "grad_norm": 1.911452054977417, "learning_rate": 2.985793103448276e-05, "loss": 0.2344, "step": 7563 }, { "epoch": 26.082758620689656, "grad_norm": 1.6938447952270508, "learning_rate": 2.9857471264367816e-05, "loss": 0.2278, "step": 7564 }, { "epoch": 26.086206896551722, "grad_norm": 1.9909632205963135, "learning_rate": 2.985701149425287e-05, "loss": 0.2806, "step": 7565 }, { "epoch": 26.089655172413792, "grad_norm": 0.7071835398674011, "learning_rate": 2.9856551724137934e-05, "loss": 0.3367, "step": 7566 }, { "epoch": 26.093103448275862, "grad_norm": 0.8579555749893188, "learning_rate": 2.985609195402299e-05, "loss": 0.2832, "step": 7567 }, { "epoch": 26.09655172413793, "grad_norm": 0.6627320647239685, "learning_rate": 2.9855632183908044e-05, "loss": 0.2592, "step": 7568 }, { "epoch": 26.1, "grad_norm": 0.7814183831214905, "learning_rate": 2.9855172413793103e-05, "loss": 0.2652, "step": 7569 }, { "epoch": 26.103448275862068, "grad_norm": 0.9820364713668823, "learning_rate": 2.9854712643678162e-05, "loss": 0.2813, "step": 7570 }, { "epoch": 26.106896551724137, "grad_norm": 0.7989264726638794, "learning_rate": 2.985425287356322e-05, "loss": 0.2432, "step": 7571 }, { "epoch": 26.110344827586207, "grad_norm": 0.8927903771400452, "learning_rate": 2.9853793103448276e-05, "loss": 0.2344, "step": 7572 }, { "epoch": 26.113793103448277, "grad_norm": 0.943806529045105, "learning_rate": 2.985333333333333e-05, "loss": 0.2384, "step": 7573 }, { "epoch": 26.117241379310343, "grad_norm": 1.4379924535751343, "learning_rate": 2.9852873563218393e-05, "loss": 0.2347, "step": 7574 }, { "epoch": 26.120689655172413, "grad_norm": 1.0179270505905151, "learning_rate": 2.985241379310345e-05, "loss": 0.2409, "step": 7575 }, { "epoch": 26.124137931034483, "grad_norm": 1.0386557579040527, "learning_rate": 2.9851954022988507e-05, "loss": 0.2206, "step": 7576 }, { "epoch": 26.127586206896552, "grad_norm": 0.9568405151367188, "learning_rate": 2.9851494252873562e-05, "loss": 0.247, "step": 7577 }, { "epoch": 26.131034482758622, "grad_norm": 0.884819507598877, "learning_rate": 2.985103448275862e-05, "loss": 0.2323, "step": 7578 }, { "epoch": 26.13448275862069, "grad_norm": 1.2702484130859375, "learning_rate": 2.985057471264368e-05, "loss": 0.2496, "step": 7579 }, { "epoch": 26.137931034482758, "grad_norm": 1.2263050079345703, "learning_rate": 2.9850114942528735e-05, "loss": 0.2387, "step": 7580 }, { "epoch": 26.141379310344828, "grad_norm": 1.0871485471725464, "learning_rate": 2.9849655172413794e-05, "loss": 0.223, "step": 7581 }, { "epoch": 26.144827586206898, "grad_norm": 0.828565776348114, "learning_rate": 2.9849195402298852e-05, "loss": 0.2322, "step": 7582 }, { "epoch": 26.148275862068967, "grad_norm": 1.1681389808654785, "learning_rate": 2.9848735632183908e-05, "loss": 0.2682, "step": 7583 }, { "epoch": 26.151724137931033, "grad_norm": 1.1894543170928955, "learning_rate": 2.9848275862068966e-05, "loss": 0.2215, "step": 7584 }, { "epoch": 26.155172413793103, "grad_norm": 1.8360000848770142, "learning_rate": 2.984781609195402e-05, "loss": 0.2204, "step": 7585 }, { "epoch": 26.158620689655173, "grad_norm": 1.0672725439071655, "learning_rate": 2.9847356321839084e-05, "loss": 0.2452, "step": 7586 }, { "epoch": 26.162068965517243, "grad_norm": 0.8687919974327087, "learning_rate": 2.984689655172414e-05, "loss": 0.238, "step": 7587 }, { "epoch": 26.16551724137931, "grad_norm": 0.8341078162193298, "learning_rate": 2.9846436781609194e-05, "loss": 0.2205, "step": 7588 }, { "epoch": 26.16896551724138, "grad_norm": 0.9983385801315308, "learning_rate": 2.9845977011494253e-05, "loss": 0.2801, "step": 7589 }, { "epoch": 26.17241379310345, "grad_norm": 1.1600241661071777, "learning_rate": 2.984551724137931e-05, "loss": 0.3292, "step": 7590 }, { "epoch": 26.175862068965518, "grad_norm": 0.6308494806289673, "learning_rate": 2.984505747126437e-05, "loss": 0.2947, "step": 7591 }, { "epoch": 26.179310344827588, "grad_norm": 0.4973528981208801, "learning_rate": 2.9844597701149426e-05, "loss": 0.2488, "step": 7592 }, { "epoch": 26.182758620689654, "grad_norm": 0.6360438466072083, "learning_rate": 2.984413793103448e-05, "loss": 0.2432, "step": 7593 }, { "epoch": 26.186206896551724, "grad_norm": 0.8621991276741028, "learning_rate": 2.9843678160919543e-05, "loss": 0.2264, "step": 7594 }, { "epoch": 26.189655172413794, "grad_norm": 1.1523942947387695, "learning_rate": 2.9843218390804598e-05, "loss": 0.2388, "step": 7595 }, { "epoch": 26.193103448275863, "grad_norm": 1.6003023386001587, "learning_rate": 2.9842758620689654e-05, "loss": 0.233, "step": 7596 }, { "epoch": 26.19655172413793, "grad_norm": 0.6150866150856018, "learning_rate": 2.9842298850574712e-05, "loss": 0.2525, "step": 7597 }, { "epoch": 26.2, "grad_norm": 0.7697067260742188, "learning_rate": 2.984183908045977e-05, "loss": 0.2617, "step": 7598 }, { "epoch": 26.20344827586207, "grad_norm": 1.0442687273025513, "learning_rate": 2.984137931034483e-05, "loss": 0.2481, "step": 7599 }, { "epoch": 26.20689655172414, "grad_norm": 0.7634774446487427, "learning_rate": 2.9840919540229885e-05, "loss": 0.2486, "step": 7600 }, { "epoch": 26.21034482758621, "grad_norm": 0.9284417629241943, "learning_rate": 2.984045977011494e-05, "loss": 0.2392, "step": 7601 }, { "epoch": 26.213793103448275, "grad_norm": 0.5680017471313477, "learning_rate": 2.9840000000000002e-05, "loss": 0.235, "step": 7602 }, { "epoch": 26.217241379310344, "grad_norm": 1.072055459022522, "learning_rate": 2.9839540229885058e-05, "loss": 0.2615, "step": 7603 }, { "epoch": 26.220689655172414, "grad_norm": 1.0764169692993164, "learning_rate": 2.9839080459770116e-05, "loss": 0.2406, "step": 7604 }, { "epoch": 26.224137931034484, "grad_norm": 0.9168227910995483, "learning_rate": 2.983862068965517e-05, "loss": 0.2452, "step": 7605 }, { "epoch": 26.22758620689655, "grad_norm": 0.7636924982070923, "learning_rate": 2.983816091954023e-05, "loss": 0.236, "step": 7606 }, { "epoch": 26.23103448275862, "grad_norm": 0.8848972320556641, "learning_rate": 2.983770114942529e-05, "loss": 0.2338, "step": 7607 }, { "epoch": 26.23448275862069, "grad_norm": 0.5718703269958496, "learning_rate": 2.9837241379310344e-05, "loss": 0.2269, "step": 7608 }, { "epoch": 26.23793103448276, "grad_norm": 1.486546277999878, "learning_rate": 2.9836781609195403e-05, "loss": 0.2345, "step": 7609 }, { "epoch": 26.24137931034483, "grad_norm": 1.6755867004394531, "learning_rate": 2.983632183908046e-05, "loss": 0.2158, "step": 7610 }, { "epoch": 26.244827586206895, "grad_norm": 1.2255754470825195, "learning_rate": 2.9835862068965517e-05, "loss": 0.2148, "step": 7611 }, { "epoch": 26.248275862068965, "grad_norm": 0.9802094101905823, "learning_rate": 2.9835402298850576e-05, "loss": 0.2349, "step": 7612 }, { "epoch": 26.251724137931035, "grad_norm": 1.6429669857025146, "learning_rate": 2.983494252873563e-05, "loss": 0.2241, "step": 7613 }, { "epoch": 26.255172413793105, "grad_norm": 0.9954785704612732, "learning_rate": 2.9834482758620693e-05, "loss": 0.2342, "step": 7614 }, { "epoch": 26.25862068965517, "grad_norm": 1.73861563205719, "learning_rate": 2.9834022988505748e-05, "loss": 0.3018, "step": 7615 }, { "epoch": 26.26206896551724, "grad_norm": 0.6885557174682617, "learning_rate": 2.9833563218390804e-05, "loss": 0.2887, "step": 7616 }, { "epoch": 26.26551724137931, "grad_norm": 0.5955564379692078, "learning_rate": 2.9833103448275862e-05, "loss": 0.2625, "step": 7617 }, { "epoch": 26.26896551724138, "grad_norm": 0.6750549077987671, "learning_rate": 2.983264367816092e-05, "loss": 0.2726, "step": 7618 }, { "epoch": 26.27241379310345, "grad_norm": 0.7428629398345947, "learning_rate": 2.983218390804598e-05, "loss": 0.2445, "step": 7619 }, { "epoch": 26.275862068965516, "grad_norm": 0.548012912273407, "learning_rate": 2.9831724137931035e-05, "loss": 0.2354, "step": 7620 }, { "epoch": 26.279310344827586, "grad_norm": 0.5931244492530823, "learning_rate": 2.983126436781609e-05, "loss": 0.2482, "step": 7621 }, { "epoch": 26.282758620689656, "grad_norm": 0.7713026404380798, "learning_rate": 2.9830804597701152e-05, "loss": 0.264, "step": 7622 }, { "epoch": 26.286206896551725, "grad_norm": 2.442650079727173, "learning_rate": 2.9830344827586208e-05, "loss": 0.2495, "step": 7623 }, { "epoch": 26.28965517241379, "grad_norm": 1.5892618894577026, "learning_rate": 2.9829885057471263e-05, "loss": 0.2477, "step": 7624 }, { "epoch": 26.29310344827586, "grad_norm": 1.4027010202407837, "learning_rate": 2.982942528735632e-05, "loss": 0.2567, "step": 7625 }, { "epoch": 26.29655172413793, "grad_norm": 1.1220465898513794, "learning_rate": 2.982896551724138e-05, "loss": 0.2342, "step": 7626 }, { "epoch": 26.3, "grad_norm": 0.6972109079360962, "learning_rate": 2.982850574712644e-05, "loss": 0.246, "step": 7627 }, { "epoch": 26.30344827586207, "grad_norm": 1.1577388048171997, "learning_rate": 2.9828045977011494e-05, "loss": 0.2399, "step": 7628 }, { "epoch": 26.306896551724137, "grad_norm": 0.547025740146637, "learning_rate": 2.982758620689655e-05, "loss": 0.2362, "step": 7629 }, { "epoch": 26.310344827586206, "grad_norm": 0.6976193189620972, "learning_rate": 2.982712643678161e-05, "loss": 0.2183, "step": 7630 }, { "epoch": 26.313793103448276, "grad_norm": 0.5744312405586243, "learning_rate": 2.9826666666666667e-05, "loss": 0.2013, "step": 7631 }, { "epoch": 26.317241379310346, "grad_norm": 0.558634877204895, "learning_rate": 2.9826206896551726e-05, "loss": 0.2108, "step": 7632 }, { "epoch": 26.320689655172412, "grad_norm": 1.2371819019317627, "learning_rate": 2.982574712643678e-05, "loss": 0.1955, "step": 7633 }, { "epoch": 26.324137931034482, "grad_norm": 1.2905091047286987, "learning_rate": 2.982528735632184e-05, "loss": 0.222, "step": 7634 }, { "epoch": 26.32758620689655, "grad_norm": 0.831479012966156, "learning_rate": 2.9824827586206898e-05, "loss": 0.2361, "step": 7635 }, { "epoch": 26.33103448275862, "grad_norm": 1.4061635732650757, "learning_rate": 2.9824367816091953e-05, "loss": 0.218, "step": 7636 }, { "epoch": 26.33448275862069, "grad_norm": 0.6816348433494568, "learning_rate": 2.9823908045977012e-05, "loss": 0.2286, "step": 7637 }, { "epoch": 26.337931034482757, "grad_norm": 0.9115190505981445, "learning_rate": 2.982344827586207e-05, "loss": 0.2388, "step": 7638 }, { "epoch": 26.341379310344827, "grad_norm": 1.663245677947998, "learning_rate": 2.9822988505747126e-05, "loss": 0.2325, "step": 7639 }, { "epoch": 26.344827586206897, "grad_norm": 1.319451928138733, "learning_rate": 2.9822528735632185e-05, "loss": 0.3153, "step": 7640 }, { "epoch": 26.348275862068967, "grad_norm": 0.5125084519386292, "learning_rate": 2.982206896551724e-05, "loss": 0.3103, "step": 7641 }, { "epoch": 26.351724137931033, "grad_norm": 0.7766849398612976, "learning_rate": 2.9821609195402302e-05, "loss": 0.2554, "step": 7642 }, { "epoch": 26.355172413793102, "grad_norm": 0.8748714327812195, "learning_rate": 2.9821149425287357e-05, "loss": 0.2459, "step": 7643 }, { "epoch": 26.358620689655172, "grad_norm": 1.8122694492340088, "learning_rate": 2.9820689655172413e-05, "loss": 0.2312, "step": 7644 }, { "epoch": 26.362068965517242, "grad_norm": 0.66720050573349, "learning_rate": 2.982022988505747e-05, "loss": 0.2563, "step": 7645 }, { "epoch": 26.36551724137931, "grad_norm": 1.3217238187789917, "learning_rate": 2.981977011494253e-05, "loss": 0.2442, "step": 7646 }, { "epoch": 26.368965517241378, "grad_norm": 1.1271308660507202, "learning_rate": 2.981931034482759e-05, "loss": 0.2356, "step": 7647 }, { "epoch": 26.372413793103448, "grad_norm": 0.8820443153381348, "learning_rate": 2.9818850574712644e-05, "loss": 0.2519, "step": 7648 }, { "epoch": 26.375862068965517, "grad_norm": 0.5457468032836914, "learning_rate": 2.98183908045977e-05, "loss": 0.231, "step": 7649 }, { "epoch": 26.379310344827587, "grad_norm": 1.0055410861968994, "learning_rate": 2.981793103448276e-05, "loss": 0.23, "step": 7650 }, { "epoch": 26.382758620689657, "grad_norm": 0.9447782039642334, "learning_rate": 2.9817471264367817e-05, "loss": 0.2402, "step": 7651 }, { "epoch": 26.386206896551723, "grad_norm": 0.8335046768188477, "learning_rate": 2.9817011494252875e-05, "loss": 0.2467, "step": 7652 }, { "epoch": 26.389655172413793, "grad_norm": 0.8779665231704712, "learning_rate": 2.981655172413793e-05, "loss": 0.2096, "step": 7653 }, { "epoch": 26.393103448275863, "grad_norm": 1.7933008670806885, "learning_rate": 2.981609195402299e-05, "loss": 0.2324, "step": 7654 }, { "epoch": 26.396551724137932, "grad_norm": 0.851626455783844, "learning_rate": 2.9815632183908048e-05, "loss": 0.2486, "step": 7655 }, { "epoch": 26.4, "grad_norm": 1.00294828414917, "learning_rate": 2.9815172413793103e-05, "loss": 0.2392, "step": 7656 }, { "epoch": 26.40344827586207, "grad_norm": 1.8135483264923096, "learning_rate": 2.981471264367816e-05, "loss": 0.2256, "step": 7657 }, { "epoch": 26.406896551724138, "grad_norm": 2.0833585262298584, "learning_rate": 2.981425287356322e-05, "loss": 0.2302, "step": 7658 }, { "epoch": 26.410344827586208, "grad_norm": 1.2213447093963623, "learning_rate": 2.9813793103448276e-05, "loss": 0.253, "step": 7659 }, { "epoch": 26.413793103448278, "grad_norm": 1.3247343301773071, "learning_rate": 2.9813333333333335e-05, "loss": 0.2208, "step": 7660 }, { "epoch": 26.417241379310344, "grad_norm": 0.7266201972961426, "learning_rate": 2.981287356321839e-05, "loss": 0.2385, "step": 7661 }, { "epoch": 26.420689655172414, "grad_norm": 1.106294870376587, "learning_rate": 2.981241379310345e-05, "loss": 0.2518, "step": 7662 }, { "epoch": 26.424137931034483, "grad_norm": 1.0523020029067993, "learning_rate": 2.9811954022988507e-05, "loss": 0.2349, "step": 7663 }, { "epoch": 26.427586206896553, "grad_norm": 1.075510025024414, "learning_rate": 2.9811494252873563e-05, "loss": 0.265, "step": 7664 }, { "epoch": 26.43103448275862, "grad_norm": 1.2221542596817017, "learning_rate": 2.981103448275862e-05, "loss": 0.276, "step": 7665 }, { "epoch": 26.43448275862069, "grad_norm": 0.6477792263031006, "learning_rate": 2.981057471264368e-05, "loss": 0.313, "step": 7666 }, { "epoch": 26.43793103448276, "grad_norm": 1.7872997522354126, "learning_rate": 2.9810114942528735e-05, "loss": 0.2611, "step": 7667 }, { "epoch": 26.44137931034483, "grad_norm": 0.7384745478630066, "learning_rate": 2.9809655172413794e-05, "loss": 0.2938, "step": 7668 }, { "epoch": 26.444827586206898, "grad_norm": 0.556240975856781, "learning_rate": 2.980919540229885e-05, "loss": 0.2919, "step": 7669 }, { "epoch": 26.448275862068964, "grad_norm": 0.5812780857086182, "learning_rate": 2.980873563218391e-05, "loss": 0.2427, "step": 7670 }, { "epoch": 26.451724137931034, "grad_norm": 1.4980700016021729, "learning_rate": 2.9808275862068967e-05, "loss": 0.2314, "step": 7671 }, { "epoch": 26.455172413793104, "grad_norm": 3.937650680541992, "learning_rate": 2.9807816091954022e-05, "loss": 0.2647, "step": 7672 }, { "epoch": 26.458620689655174, "grad_norm": 0.7176464200019836, "learning_rate": 2.980735632183908e-05, "loss": 0.2663, "step": 7673 }, { "epoch": 26.46206896551724, "grad_norm": 0.8133523464202881, "learning_rate": 2.980689655172414e-05, "loss": 0.2198, "step": 7674 }, { "epoch": 26.46551724137931, "grad_norm": 0.5888031125068665, "learning_rate": 2.9806436781609198e-05, "loss": 0.2459, "step": 7675 }, { "epoch": 26.46896551724138, "grad_norm": 0.6993907690048218, "learning_rate": 2.9805977011494253e-05, "loss": 0.2127, "step": 7676 }, { "epoch": 26.47241379310345, "grad_norm": 0.6305184364318848, "learning_rate": 2.980551724137931e-05, "loss": 0.2276, "step": 7677 }, { "epoch": 26.47586206896552, "grad_norm": 0.6817377805709839, "learning_rate": 2.980505747126437e-05, "loss": 0.2184, "step": 7678 }, { "epoch": 26.479310344827585, "grad_norm": 0.7419320940971375, "learning_rate": 2.9804597701149426e-05, "loss": 0.2097, "step": 7679 }, { "epoch": 26.482758620689655, "grad_norm": 1.4760078191757202, "learning_rate": 2.9804137931034485e-05, "loss": 0.2514, "step": 7680 }, { "epoch": 26.486206896551725, "grad_norm": 1.84847092628479, "learning_rate": 2.980367816091954e-05, "loss": 0.2176, "step": 7681 }, { "epoch": 26.489655172413794, "grad_norm": 0.8575969934463501, "learning_rate": 2.98032183908046e-05, "loss": 0.2182, "step": 7682 }, { "epoch": 26.49310344827586, "grad_norm": 0.6625683903694153, "learning_rate": 2.9802758620689657e-05, "loss": 0.2287, "step": 7683 }, { "epoch": 26.49655172413793, "grad_norm": 0.7885624170303345, "learning_rate": 2.9802298850574713e-05, "loss": 0.2299, "step": 7684 }, { "epoch": 26.5, "grad_norm": 0.8431074619293213, "learning_rate": 2.9801839080459768e-05, "loss": 0.231, "step": 7685 }, { "epoch": 26.50344827586207, "grad_norm": 0.882188081741333, "learning_rate": 2.980137931034483e-05, "loss": 0.2283, "step": 7686 }, { "epoch": 26.50689655172414, "grad_norm": 1.2810734510421753, "learning_rate": 2.9800919540229885e-05, "loss": 0.2224, "step": 7687 }, { "epoch": 26.510344827586206, "grad_norm": 1.242397665977478, "learning_rate": 2.9800459770114944e-05, "loss": 0.2275, "step": 7688 }, { "epoch": 26.513793103448275, "grad_norm": 1.8837586641311646, "learning_rate": 2.98e-05, "loss": 0.2726, "step": 7689 }, { "epoch": 26.517241379310345, "grad_norm": 2.1632041931152344, "learning_rate": 2.9799540229885058e-05, "loss": 0.3217, "step": 7690 }, { "epoch": 26.520689655172415, "grad_norm": 0.7495764493942261, "learning_rate": 2.9799080459770117e-05, "loss": 0.2924, "step": 7691 }, { "epoch": 26.52413793103448, "grad_norm": 0.9247620105743408, "learning_rate": 2.9798620689655172e-05, "loss": 0.2511, "step": 7692 }, { "epoch": 26.52758620689655, "grad_norm": 0.775790274143219, "learning_rate": 2.979816091954023e-05, "loss": 0.2796, "step": 7693 }, { "epoch": 26.53103448275862, "grad_norm": 0.6125720143318176, "learning_rate": 2.979770114942529e-05, "loss": 0.2427, "step": 7694 }, { "epoch": 26.53448275862069, "grad_norm": 0.5711391568183899, "learning_rate": 2.9797241379310345e-05, "loss": 0.2469, "step": 7695 }, { "epoch": 26.53793103448276, "grad_norm": 1.8940386772155762, "learning_rate": 2.9796781609195403e-05, "loss": 0.226, "step": 7696 }, { "epoch": 26.541379310344826, "grad_norm": 2.806086301803589, "learning_rate": 2.979632183908046e-05, "loss": 0.2447, "step": 7697 }, { "epoch": 26.544827586206896, "grad_norm": 0.8331431150436401, "learning_rate": 2.979586206896552e-05, "loss": 0.2576, "step": 7698 }, { "epoch": 26.548275862068966, "grad_norm": 0.6197203993797302, "learning_rate": 2.9795402298850576e-05, "loss": 0.2235, "step": 7699 }, { "epoch": 26.551724137931036, "grad_norm": 0.7540431022644043, "learning_rate": 2.979494252873563e-05, "loss": 0.2189, "step": 7700 }, { "epoch": 26.555172413793102, "grad_norm": 0.7595137357711792, "learning_rate": 2.979448275862069e-05, "loss": 0.2589, "step": 7701 }, { "epoch": 26.55862068965517, "grad_norm": 1.0461146831512451, "learning_rate": 2.979402298850575e-05, "loss": 0.2505, "step": 7702 }, { "epoch": 26.56206896551724, "grad_norm": 0.6385428309440613, "learning_rate": 2.9793563218390807e-05, "loss": 0.1958, "step": 7703 }, { "epoch": 26.56551724137931, "grad_norm": 1.2879267930984497, "learning_rate": 2.9793103448275863e-05, "loss": 0.2391, "step": 7704 }, { "epoch": 26.56896551724138, "grad_norm": 0.6004958748817444, "learning_rate": 2.9792643678160918e-05, "loss": 0.2267, "step": 7705 }, { "epoch": 26.572413793103447, "grad_norm": 0.8363475203514099, "learning_rate": 2.979218390804598e-05, "loss": 0.2386, "step": 7706 }, { "epoch": 26.575862068965517, "grad_norm": 1.7155301570892334, "learning_rate": 2.9791724137931035e-05, "loss": 0.2368, "step": 7707 }, { "epoch": 26.579310344827586, "grad_norm": 1.8717701435089111, "learning_rate": 2.9791264367816094e-05, "loss": 0.2479, "step": 7708 }, { "epoch": 26.582758620689656, "grad_norm": 0.9793787002563477, "learning_rate": 2.979080459770115e-05, "loss": 0.2075, "step": 7709 }, { "epoch": 26.586206896551722, "grad_norm": 2.148188591003418, "learning_rate": 2.9790344827586208e-05, "loss": 0.2373, "step": 7710 }, { "epoch": 26.589655172413792, "grad_norm": 0.9488980174064636, "learning_rate": 2.9789885057471267e-05, "loss": 0.2497, "step": 7711 }, { "epoch": 26.593103448275862, "grad_norm": 1.9919216632843018, "learning_rate": 2.9789425287356322e-05, "loss": 0.2232, "step": 7712 }, { "epoch": 26.59655172413793, "grad_norm": 9.155431747436523, "learning_rate": 2.9788965517241377e-05, "loss": 0.2341, "step": 7713 }, { "epoch": 26.6, "grad_norm": 1.9990838766098022, "learning_rate": 2.978850574712644e-05, "loss": 0.2365, "step": 7714 }, { "epoch": 26.603448275862068, "grad_norm": 4.079277992248535, "learning_rate": 2.9788045977011495e-05, "loss": 0.3452, "step": 7715 }, { "epoch": 26.606896551724137, "grad_norm": 1.0137556791305542, "learning_rate": 2.9787586206896553e-05, "loss": 0.3334, "step": 7716 }, { "epoch": 26.610344827586207, "grad_norm": 0.4551432132720947, "learning_rate": 2.978712643678161e-05, "loss": 0.2576, "step": 7717 }, { "epoch": 26.613793103448277, "grad_norm": 0.7191035747528076, "learning_rate": 2.9786666666666667e-05, "loss": 0.2578, "step": 7718 }, { "epoch": 26.617241379310343, "grad_norm": 0.570042610168457, "learning_rate": 2.9786206896551726e-05, "loss": 0.2397, "step": 7719 }, { "epoch": 26.620689655172413, "grad_norm": 1.5133856534957886, "learning_rate": 2.978574712643678e-05, "loss": 0.2683, "step": 7720 }, { "epoch": 26.624137931034483, "grad_norm": 0.6123391389846802, "learning_rate": 2.978528735632184e-05, "loss": 0.241, "step": 7721 }, { "epoch": 26.627586206896552, "grad_norm": 0.7117083072662354, "learning_rate": 2.97848275862069e-05, "loss": 0.272, "step": 7722 }, { "epoch": 26.631034482758622, "grad_norm": 1.107370138168335, "learning_rate": 2.9784367816091954e-05, "loss": 0.2268, "step": 7723 }, { "epoch": 26.63448275862069, "grad_norm": 0.7010558843612671, "learning_rate": 2.9783908045977013e-05, "loss": 0.2396, "step": 7724 }, { "epoch": 26.637931034482758, "grad_norm": 4.107812881469727, "learning_rate": 2.9783448275862068e-05, "loss": 0.2042, "step": 7725 }, { "epoch": 26.641379310344828, "grad_norm": 0.788571834564209, "learning_rate": 2.978298850574713e-05, "loss": 0.241, "step": 7726 }, { "epoch": 26.644827586206898, "grad_norm": 0.6937601566314697, "learning_rate": 2.9782528735632185e-05, "loss": 0.2392, "step": 7727 }, { "epoch": 26.648275862068964, "grad_norm": 1.2078789472579956, "learning_rate": 2.978206896551724e-05, "loss": 0.2339, "step": 7728 }, { "epoch": 26.651724137931033, "grad_norm": 0.5507736206054688, "learning_rate": 2.97816091954023e-05, "loss": 0.2184, "step": 7729 }, { "epoch": 26.655172413793103, "grad_norm": 2.2527565956115723, "learning_rate": 2.9781149425287358e-05, "loss": 0.2152, "step": 7730 }, { "epoch": 26.658620689655173, "grad_norm": 0.8032318353652954, "learning_rate": 2.9780689655172417e-05, "loss": 0.2531, "step": 7731 }, { "epoch": 26.662068965517243, "grad_norm": 1.105934739112854, "learning_rate": 2.9780229885057472e-05, "loss": 0.2422, "step": 7732 }, { "epoch": 26.66551724137931, "grad_norm": 0.872360348701477, "learning_rate": 2.9779770114942527e-05, "loss": 0.2264, "step": 7733 }, { "epoch": 26.66896551724138, "grad_norm": 1.8587836027145386, "learning_rate": 2.977931034482759e-05, "loss": 0.2309, "step": 7734 }, { "epoch": 26.67241379310345, "grad_norm": 1.0759955644607544, "learning_rate": 2.9778850574712644e-05, "loss": 0.2384, "step": 7735 }, { "epoch": 26.675862068965518, "grad_norm": 1.0669924020767212, "learning_rate": 2.9778390804597703e-05, "loss": 0.2324, "step": 7736 }, { "epoch": 26.679310344827588, "grad_norm": 1.0300849676132202, "learning_rate": 2.977793103448276e-05, "loss": 0.2012, "step": 7737 }, { "epoch": 26.682758620689654, "grad_norm": 1.573873519897461, "learning_rate": 2.9777471264367817e-05, "loss": 0.2323, "step": 7738 }, { "epoch": 26.686206896551724, "grad_norm": 1.0834933519363403, "learning_rate": 2.9777011494252876e-05, "loss": 0.2552, "step": 7739 }, { "epoch": 26.689655172413794, "grad_norm": 1.4321492910385132, "learning_rate": 2.977655172413793e-05, "loss": 0.3274, "step": 7740 }, { "epoch": 26.693103448275863, "grad_norm": 1.4193236827850342, "learning_rate": 2.9776091954022986e-05, "loss": 0.2828, "step": 7741 }, { "epoch": 26.69655172413793, "grad_norm": 0.7215870022773743, "learning_rate": 2.977563218390805e-05, "loss": 0.2555, "step": 7742 }, { "epoch": 26.7, "grad_norm": 0.8780179619789124, "learning_rate": 2.9775172413793104e-05, "loss": 0.2726, "step": 7743 }, { "epoch": 26.70344827586207, "grad_norm": 1.1450932025909424, "learning_rate": 2.9774712643678162e-05, "loss": 0.2657, "step": 7744 }, { "epoch": 26.70689655172414, "grad_norm": 2.159853458404541, "learning_rate": 2.9774252873563218e-05, "loss": 0.2589, "step": 7745 }, { "epoch": 26.71034482758621, "grad_norm": 0.9480910897254944, "learning_rate": 2.9773793103448276e-05, "loss": 0.2358, "step": 7746 }, { "epoch": 26.713793103448275, "grad_norm": 2.266231060028076, "learning_rate": 2.9773333333333335e-05, "loss": 0.2789, "step": 7747 }, { "epoch": 26.717241379310344, "grad_norm": 1.286378264427185, "learning_rate": 2.977287356321839e-05, "loss": 0.2398, "step": 7748 }, { "epoch": 26.720689655172414, "grad_norm": 0.8296560645103455, "learning_rate": 2.977241379310345e-05, "loss": 0.2346, "step": 7749 }, { "epoch": 26.724137931034484, "grad_norm": 0.7364253997802734, "learning_rate": 2.9771954022988508e-05, "loss": 0.2251, "step": 7750 }, { "epoch": 26.72758620689655, "grad_norm": 0.5849937796592712, "learning_rate": 2.9771494252873563e-05, "loss": 0.2122, "step": 7751 }, { "epoch": 26.73103448275862, "grad_norm": 0.7854217290878296, "learning_rate": 2.9771034482758622e-05, "loss": 0.2845, "step": 7752 }, { "epoch": 26.73448275862069, "grad_norm": 0.8715479969978333, "learning_rate": 2.9770574712643677e-05, "loss": 0.2361, "step": 7753 }, { "epoch": 26.73793103448276, "grad_norm": 1.1669756174087524, "learning_rate": 2.977011494252874e-05, "loss": 0.2325, "step": 7754 }, { "epoch": 26.74137931034483, "grad_norm": 1.733185052871704, "learning_rate": 2.9769655172413794e-05, "loss": 0.2205, "step": 7755 }, { "epoch": 26.744827586206895, "grad_norm": 1.2190784215927124, "learning_rate": 2.976919540229885e-05, "loss": 0.2213, "step": 7756 }, { "epoch": 26.748275862068965, "grad_norm": 0.7832764983177185, "learning_rate": 2.976873563218391e-05, "loss": 0.2023, "step": 7757 }, { "epoch": 26.751724137931035, "grad_norm": 0.6900325417518616, "learning_rate": 2.9768275862068967e-05, "loss": 0.2193, "step": 7758 }, { "epoch": 26.755172413793105, "grad_norm": 0.8778819441795349, "learning_rate": 2.9767816091954026e-05, "loss": 0.2266, "step": 7759 }, { "epoch": 26.75862068965517, "grad_norm": 0.9504672288894653, "learning_rate": 2.976735632183908e-05, "loss": 0.2406, "step": 7760 }, { "epoch": 26.76206896551724, "grad_norm": 1.1446834802627563, "learning_rate": 2.9766896551724136e-05, "loss": 0.2195, "step": 7761 }, { "epoch": 26.76551724137931, "grad_norm": 1.3391151428222656, "learning_rate": 2.97664367816092e-05, "loss": 0.1927, "step": 7762 }, { "epoch": 26.76896551724138, "grad_norm": 1.179391860961914, "learning_rate": 2.9765977011494254e-05, "loss": 0.2376, "step": 7763 }, { "epoch": 26.77241379310345, "grad_norm": 1.0322009325027466, "learning_rate": 2.9765517241379312e-05, "loss": 0.2262, "step": 7764 }, { "epoch": 26.775862068965516, "grad_norm": 1.5980702638626099, "learning_rate": 2.9765057471264368e-05, "loss": 0.326, "step": 7765 }, { "epoch": 26.779310344827586, "grad_norm": 0.7293186783790588, "learning_rate": 2.9764597701149426e-05, "loss": 0.2942, "step": 7766 }, { "epoch": 26.782758620689656, "grad_norm": 1.604705810546875, "learning_rate": 2.9764137931034485e-05, "loss": 0.2727, "step": 7767 }, { "epoch": 26.786206896551725, "grad_norm": 1.1060764789581299, "learning_rate": 2.976367816091954e-05, "loss": 0.249, "step": 7768 }, { "epoch": 26.78965517241379, "grad_norm": 0.8959229588508606, "learning_rate": 2.97632183908046e-05, "loss": 0.2522, "step": 7769 }, { "epoch": 26.79310344827586, "grad_norm": 0.6743690967559814, "learning_rate": 2.9762758620689658e-05, "loss": 0.2741, "step": 7770 }, { "epoch": 26.79655172413793, "grad_norm": 0.9511473774909973, "learning_rate": 2.9762298850574713e-05, "loss": 0.2668, "step": 7771 }, { "epoch": 26.8, "grad_norm": 0.8662059903144836, "learning_rate": 2.9761839080459772e-05, "loss": 0.2459, "step": 7772 }, { "epoch": 26.80344827586207, "grad_norm": 1.2189372777938843, "learning_rate": 2.9761379310344827e-05, "loss": 0.2578, "step": 7773 }, { "epoch": 26.806896551724137, "grad_norm": 0.8585258722305298, "learning_rate": 2.9760919540229886e-05, "loss": 0.2317, "step": 7774 }, { "epoch": 26.810344827586206, "grad_norm": 0.6791765689849854, "learning_rate": 2.9760459770114944e-05, "loss": 0.2465, "step": 7775 }, { "epoch": 26.813793103448276, "grad_norm": 0.9211806654930115, "learning_rate": 2.976e-05, "loss": 0.2756, "step": 7776 }, { "epoch": 26.817241379310346, "grad_norm": 0.6648785471916199, "learning_rate": 2.975954022988506e-05, "loss": 0.2427, "step": 7777 }, { "epoch": 26.820689655172412, "grad_norm": 1.759674310684204, "learning_rate": 2.9759080459770117e-05, "loss": 0.2245, "step": 7778 }, { "epoch": 26.824137931034482, "grad_norm": 1.1260647773742676, "learning_rate": 2.9758620689655172e-05, "loss": 0.2452, "step": 7779 }, { "epoch": 26.82758620689655, "grad_norm": 0.9551091194152832, "learning_rate": 2.975816091954023e-05, "loss": 0.2293, "step": 7780 }, { "epoch": 26.83103448275862, "grad_norm": 0.7731865048408508, "learning_rate": 2.9757701149425286e-05, "loss": 0.2175, "step": 7781 }, { "epoch": 26.83448275862069, "grad_norm": 1.3498717546463013, "learning_rate": 2.975724137931035e-05, "loss": 0.235, "step": 7782 }, { "epoch": 26.837931034482757, "grad_norm": 0.9011282324790955, "learning_rate": 2.9756781609195404e-05, "loss": 0.2269, "step": 7783 }, { "epoch": 26.841379310344827, "grad_norm": 0.728153645992279, "learning_rate": 2.975632183908046e-05, "loss": 0.2181, "step": 7784 }, { "epoch": 26.844827586206897, "grad_norm": 0.9273173213005066, "learning_rate": 2.9755862068965518e-05, "loss": 0.2176, "step": 7785 }, { "epoch": 26.848275862068967, "grad_norm": 2.4302353858947754, "learning_rate": 2.9755402298850576e-05, "loss": 0.2218, "step": 7786 }, { "epoch": 26.851724137931036, "grad_norm": 0.7349728345870972, "learning_rate": 2.9754942528735635e-05, "loss": 0.2409, "step": 7787 }, { "epoch": 26.855172413793102, "grad_norm": 0.9065766334533691, "learning_rate": 2.975448275862069e-05, "loss": 0.2317, "step": 7788 }, { "epoch": 26.858620689655172, "grad_norm": 1.6966965198516846, "learning_rate": 2.9754022988505746e-05, "loss": 0.2881, "step": 7789 }, { "epoch": 26.862068965517242, "grad_norm": 1.6190645694732666, "learning_rate": 2.9753563218390808e-05, "loss": 0.3444, "step": 7790 }, { "epoch": 26.86551724137931, "grad_norm": 11.560129165649414, "learning_rate": 2.9753103448275863e-05, "loss": 0.3106, "step": 7791 }, { "epoch": 26.868965517241378, "grad_norm": 0.716063380241394, "learning_rate": 2.975264367816092e-05, "loss": 0.2754, "step": 7792 }, { "epoch": 26.872413793103448, "grad_norm": 0.9228999614715576, "learning_rate": 2.9752183908045977e-05, "loss": 0.2604, "step": 7793 }, { "epoch": 26.875862068965517, "grad_norm": 0.6774492859840393, "learning_rate": 2.9751724137931036e-05, "loss": 0.2582, "step": 7794 }, { "epoch": 26.879310344827587, "grad_norm": 2.808837890625, "learning_rate": 2.9751264367816094e-05, "loss": 0.2439, "step": 7795 }, { "epoch": 26.882758620689657, "grad_norm": 0.6881598234176636, "learning_rate": 2.975080459770115e-05, "loss": 0.2601, "step": 7796 }, { "epoch": 26.886206896551723, "grad_norm": 0.6669313907623291, "learning_rate": 2.9750344827586208e-05, "loss": 0.253, "step": 7797 }, { "epoch": 26.889655172413793, "grad_norm": 0.8753637075424194, "learning_rate": 2.9749885057471267e-05, "loss": 0.2545, "step": 7798 }, { "epoch": 26.893103448275863, "grad_norm": 0.6520353555679321, "learning_rate": 2.9749425287356322e-05, "loss": 0.2451, "step": 7799 }, { "epoch": 26.896551724137932, "grad_norm": 1.2508599758148193, "learning_rate": 2.974896551724138e-05, "loss": 0.2076, "step": 7800 }, { "epoch": 26.9, "grad_norm": 0.6372467279434204, "learning_rate": 2.9748505747126436e-05, "loss": 0.2262, "step": 7801 }, { "epoch": 26.90344827586207, "grad_norm": 0.8142106533050537, "learning_rate": 2.9748045977011495e-05, "loss": 0.2045, "step": 7802 }, { "epoch": 26.906896551724138, "grad_norm": 0.9703889489173889, "learning_rate": 2.9747586206896554e-05, "loss": 0.2696, "step": 7803 }, { "epoch": 26.910344827586208, "grad_norm": 0.8535445332527161, "learning_rate": 2.974712643678161e-05, "loss": 0.2632, "step": 7804 }, { "epoch": 26.913793103448278, "grad_norm": 2.3335189819335938, "learning_rate": 2.9746666666666668e-05, "loss": 0.2961, "step": 7805 }, { "epoch": 26.917241379310344, "grad_norm": 0.636120617389679, "learning_rate": 2.9746206896551726e-05, "loss": 0.207, "step": 7806 }, { "epoch": 26.920689655172414, "grad_norm": 0.5987874269485474, "learning_rate": 2.974574712643678e-05, "loss": 0.2247, "step": 7807 }, { "epoch": 26.924137931034483, "grad_norm": 0.8599744439125061, "learning_rate": 2.974528735632184e-05, "loss": 0.2099, "step": 7808 }, { "epoch": 26.927586206896553, "grad_norm": 2.6717023849487305, "learning_rate": 2.9744827586206896e-05, "loss": 0.2191, "step": 7809 }, { "epoch": 26.93103448275862, "grad_norm": 1.718634843826294, "learning_rate": 2.9744367816091958e-05, "loss": 0.2461, "step": 7810 }, { "epoch": 26.93448275862069, "grad_norm": 3.511272430419922, "learning_rate": 2.9743908045977013e-05, "loss": 0.2615, "step": 7811 }, { "epoch": 26.93793103448276, "grad_norm": 1.044217586517334, "learning_rate": 2.9743448275862068e-05, "loss": 0.222, "step": 7812 }, { "epoch": 26.94137931034483, "grad_norm": 2.6990652084350586, "learning_rate": 2.9742988505747127e-05, "loss": 0.1926, "step": 7813 }, { "epoch": 26.944827586206898, "grad_norm": 1.1463943719863892, "learning_rate": 2.9742528735632186e-05, "loss": 0.28, "step": 7814 }, { "epoch": 26.948275862068964, "grad_norm": 1.4683082103729248, "learning_rate": 2.9742068965517244e-05, "loss": 0.3353, "step": 7815 }, { "epoch": 26.951724137931034, "grad_norm": 1.04561448097229, "learning_rate": 2.97416091954023e-05, "loss": 0.2832, "step": 7816 }, { "epoch": 26.955172413793104, "grad_norm": 1.421687364578247, "learning_rate": 2.9741149425287355e-05, "loss": 0.2627, "step": 7817 }, { "epoch": 26.958620689655174, "grad_norm": 0.8459551930427551, "learning_rate": 2.9740689655172417e-05, "loss": 0.2445, "step": 7818 }, { "epoch": 26.96206896551724, "grad_norm": 0.6345975399017334, "learning_rate": 2.9740229885057472e-05, "loss": 0.243, "step": 7819 }, { "epoch": 26.96551724137931, "grad_norm": 1.1516923904418945, "learning_rate": 2.973977011494253e-05, "loss": 0.2273, "step": 7820 }, { "epoch": 26.96896551724138, "grad_norm": 0.7142924666404724, "learning_rate": 2.9739310344827586e-05, "loss": 0.2209, "step": 7821 }, { "epoch": 26.97241379310345, "grad_norm": 0.5143298506736755, "learning_rate": 2.9738850574712645e-05, "loss": 0.2277, "step": 7822 }, { "epoch": 26.97586206896552, "grad_norm": 0.7581377625465393, "learning_rate": 2.9738390804597704e-05, "loss": 0.2287, "step": 7823 }, { "epoch": 26.979310344827585, "grad_norm": 0.5139353275299072, "learning_rate": 2.973793103448276e-05, "loss": 0.2345, "step": 7824 }, { "epoch": 26.982758620689655, "grad_norm": 0.86783367395401, "learning_rate": 2.9737471264367818e-05, "loss": 0.2318, "step": 7825 }, { "epoch": 26.986206896551725, "grad_norm": 0.8700705170631409, "learning_rate": 2.9737011494252876e-05, "loss": 0.2316, "step": 7826 }, { "epoch": 26.989655172413794, "grad_norm": 1.2688772678375244, "learning_rate": 2.973655172413793e-05, "loss": 0.2635, "step": 7827 }, { "epoch": 26.99310344827586, "grad_norm": 1.9334579706192017, "learning_rate": 2.973609195402299e-05, "loss": 0.2192, "step": 7828 }, { "epoch": 26.99655172413793, "grad_norm": 1.7092812061309814, "learning_rate": 2.9735632183908045e-05, "loss": 0.2764, "step": 7829 }, { "epoch": 27.0, "grad_norm": 1.2972509860992432, "learning_rate": 2.9735172413793104e-05, "loss": 0.3218, "step": 7830 }, { "epoch": 27.00344827586207, "grad_norm": 0.7008881568908691, "learning_rate": 2.9734712643678163e-05, "loss": 0.288, "step": 7831 }, { "epoch": 27.00689655172414, "grad_norm": 1.5402048826217651, "learning_rate": 2.9734252873563218e-05, "loss": 0.264, "step": 7832 }, { "epoch": 27.010344827586206, "grad_norm": 0.8760619759559631, "learning_rate": 2.9733793103448277e-05, "loss": 0.2541, "step": 7833 }, { "epoch": 27.013793103448275, "grad_norm": 0.5231070518493652, "learning_rate": 2.9733333333333336e-05, "loss": 0.2412, "step": 7834 }, { "epoch": 27.017241379310345, "grad_norm": 0.7853559851646423, "learning_rate": 2.973287356321839e-05, "loss": 0.2623, "step": 7835 }, { "epoch": 27.020689655172415, "grad_norm": 1.0242762565612793, "learning_rate": 2.973241379310345e-05, "loss": 0.2361, "step": 7836 }, { "epoch": 27.02413793103448, "grad_norm": 0.5808387398719788, "learning_rate": 2.9731954022988505e-05, "loss": 0.2474, "step": 7837 }, { "epoch": 27.02758620689655, "grad_norm": 0.7261859178543091, "learning_rate": 2.9731494252873567e-05, "loss": 0.2429, "step": 7838 }, { "epoch": 27.03103448275862, "grad_norm": 0.48232197761535645, "learning_rate": 2.9731034482758622e-05, "loss": 0.2351, "step": 7839 }, { "epoch": 27.03448275862069, "grad_norm": 1.1672170162200928, "learning_rate": 2.9730574712643677e-05, "loss": 0.2351, "step": 7840 }, { "epoch": 27.03793103448276, "grad_norm": 1.0847972631454468, "learning_rate": 2.9730114942528736e-05, "loss": 0.2447, "step": 7841 }, { "epoch": 27.041379310344826, "grad_norm": 0.5179333090782166, "learning_rate": 2.9729655172413795e-05, "loss": 0.2302, "step": 7842 }, { "epoch": 27.044827586206896, "grad_norm": 1.8996340036392212, "learning_rate": 2.9729195402298853e-05, "loss": 0.2292, "step": 7843 }, { "epoch": 27.048275862068966, "grad_norm": 0.7947349548339844, "learning_rate": 2.972873563218391e-05, "loss": 0.2173, "step": 7844 }, { "epoch": 27.051724137931036, "grad_norm": 1.1006096601486206, "learning_rate": 2.9728275862068964e-05, "loss": 0.2127, "step": 7845 }, { "epoch": 27.055172413793102, "grad_norm": 1.3458606004714966, "learning_rate": 2.9727816091954026e-05, "loss": 0.2462, "step": 7846 }, { "epoch": 27.05862068965517, "grad_norm": 1.2563185691833496, "learning_rate": 2.972735632183908e-05, "loss": 0.2274, "step": 7847 }, { "epoch": 27.06206896551724, "grad_norm": 0.9550162553787231, "learning_rate": 2.972689655172414e-05, "loss": 0.2091, "step": 7848 }, { "epoch": 27.06551724137931, "grad_norm": 2.924751043319702, "learning_rate": 2.9726436781609195e-05, "loss": 0.2555, "step": 7849 }, { "epoch": 27.06896551724138, "grad_norm": 0.7546530961990356, "learning_rate": 2.9725977011494254e-05, "loss": 0.2159, "step": 7850 }, { "epoch": 27.072413793103447, "grad_norm": 0.8617950081825256, "learning_rate": 2.9725517241379313e-05, "loss": 0.2237, "step": 7851 }, { "epoch": 27.075862068965517, "grad_norm": 0.9151374697685242, "learning_rate": 2.9725057471264368e-05, "loss": 0.1948, "step": 7852 }, { "epoch": 27.079310344827586, "grad_norm": 1.6637616157531738, "learning_rate": 2.9724597701149427e-05, "loss": 0.2385, "step": 7853 }, { "epoch": 27.082758620689656, "grad_norm": 1.9351460933685303, "learning_rate": 2.9724137931034482e-05, "loss": 0.251, "step": 7854 }, { "epoch": 27.086206896551722, "grad_norm": 1.4743613004684448, "learning_rate": 2.972367816091954e-05, "loss": 0.2939, "step": 7855 }, { "epoch": 27.089655172413792, "grad_norm": 1.027991533279419, "learning_rate": 2.97232183908046e-05, "loss": 0.3228, "step": 7856 }, { "epoch": 27.093103448275862, "grad_norm": 1.058722734451294, "learning_rate": 2.9722758620689655e-05, "loss": 0.2362, "step": 7857 }, { "epoch": 27.09655172413793, "grad_norm": 0.8117353320121765, "learning_rate": 2.9722298850574713e-05, "loss": 0.2433, "step": 7858 }, { "epoch": 27.1, "grad_norm": 0.6133403182029724, "learning_rate": 2.9721839080459772e-05, "loss": 0.2297, "step": 7859 }, { "epoch": 27.103448275862068, "grad_norm": 0.5746435523033142, "learning_rate": 2.9721379310344827e-05, "loss": 0.2433, "step": 7860 }, { "epoch": 27.106896551724137, "grad_norm": 0.5618568062782288, "learning_rate": 2.9720919540229886e-05, "loss": 0.2717, "step": 7861 }, { "epoch": 27.110344827586207, "grad_norm": 1.0112266540527344, "learning_rate": 2.972045977011494e-05, "loss": 0.2512, "step": 7862 }, { "epoch": 27.113793103448277, "grad_norm": 2.0787065029144287, "learning_rate": 2.972e-05, "loss": 0.2763, "step": 7863 }, { "epoch": 27.117241379310343, "grad_norm": 0.7685479521751404, "learning_rate": 2.971954022988506e-05, "loss": 0.2378, "step": 7864 }, { "epoch": 27.120689655172413, "grad_norm": 2.2309489250183105, "learning_rate": 2.9719080459770114e-05, "loss": 0.2321, "step": 7865 }, { "epoch": 27.124137931034483, "grad_norm": 0.83191978931427, "learning_rate": 2.9718620689655173e-05, "loss": 0.2093, "step": 7866 }, { "epoch": 27.127586206896552, "grad_norm": 0.8829671144485474, "learning_rate": 2.971816091954023e-05, "loss": 0.2321, "step": 7867 }, { "epoch": 27.131034482758622, "grad_norm": 0.6778651475906372, "learning_rate": 2.9717701149425287e-05, "loss": 0.2534, "step": 7868 }, { "epoch": 27.13448275862069, "grad_norm": 0.615505576133728, "learning_rate": 2.9717241379310345e-05, "loss": 0.2198, "step": 7869 }, { "epoch": 27.137931034482758, "grad_norm": 0.8423727750778198, "learning_rate": 2.97167816091954e-05, "loss": 0.2303, "step": 7870 }, { "epoch": 27.141379310344828, "grad_norm": 0.6092727184295654, "learning_rate": 2.9716321839080463e-05, "loss": 0.2037, "step": 7871 }, { "epoch": 27.144827586206898, "grad_norm": 0.6216221451759338, "learning_rate": 2.9715862068965518e-05, "loss": 0.2099, "step": 7872 }, { "epoch": 27.148275862068967, "grad_norm": 3.2338809967041016, "learning_rate": 2.9715402298850573e-05, "loss": 0.2237, "step": 7873 }, { "epoch": 27.151724137931033, "grad_norm": 0.6783102750778198, "learning_rate": 2.9714942528735632e-05, "loss": 0.2336, "step": 7874 }, { "epoch": 27.155172413793103, "grad_norm": 4.051265716552734, "learning_rate": 2.971448275862069e-05, "loss": 0.2165, "step": 7875 }, { "epoch": 27.158620689655173, "grad_norm": 0.7982636094093323, "learning_rate": 2.971402298850575e-05, "loss": 0.2329, "step": 7876 }, { "epoch": 27.162068965517243, "grad_norm": 0.9513654708862305, "learning_rate": 2.9713563218390805e-05, "loss": 0.2145, "step": 7877 }, { "epoch": 27.16551724137931, "grad_norm": 0.86795973777771, "learning_rate": 2.971310344827586e-05, "loss": 0.2297, "step": 7878 }, { "epoch": 27.16896551724138, "grad_norm": 1.6813751459121704, "learning_rate": 2.9712643678160922e-05, "loss": 0.2438, "step": 7879 }, { "epoch": 27.17241379310345, "grad_norm": 0.9584718346595764, "learning_rate": 2.9712183908045977e-05, "loss": 0.2686, "step": 7880 }, { "epoch": 27.175862068965518, "grad_norm": 0.8891062140464783, "learning_rate": 2.9711724137931036e-05, "loss": 0.2602, "step": 7881 }, { "epoch": 27.179310344827588, "grad_norm": 0.6089549660682678, "learning_rate": 2.971126436781609e-05, "loss": 0.2334, "step": 7882 }, { "epoch": 27.182758620689654, "grad_norm": 1.4655017852783203, "learning_rate": 2.971080459770115e-05, "loss": 0.2335, "step": 7883 }, { "epoch": 27.186206896551724, "grad_norm": 1.466386318206787, "learning_rate": 2.971034482758621e-05, "loss": 0.2185, "step": 7884 }, { "epoch": 27.189655172413794, "grad_norm": 0.6252477169036865, "learning_rate": 2.9709885057471264e-05, "loss": 0.2495, "step": 7885 }, { "epoch": 27.193103448275863, "grad_norm": 0.6459823250770569, "learning_rate": 2.9709425287356323e-05, "loss": 0.2257, "step": 7886 }, { "epoch": 27.19655172413793, "grad_norm": 0.8552842140197754, "learning_rate": 2.970896551724138e-05, "loss": 0.2291, "step": 7887 }, { "epoch": 27.2, "grad_norm": 0.707400381565094, "learning_rate": 2.9708505747126437e-05, "loss": 0.2448, "step": 7888 }, { "epoch": 27.20344827586207, "grad_norm": 1.1809635162353516, "learning_rate": 2.9708045977011495e-05, "loss": 0.2192, "step": 7889 }, { "epoch": 27.20689655172414, "grad_norm": 1.173307180404663, "learning_rate": 2.970758620689655e-05, "loss": 0.2259, "step": 7890 }, { "epoch": 27.21034482758621, "grad_norm": 0.7979686260223389, "learning_rate": 2.970712643678161e-05, "loss": 0.2113, "step": 7891 }, { "epoch": 27.213793103448275, "grad_norm": 0.7815075516700745, "learning_rate": 2.9706666666666668e-05, "loss": 0.2106, "step": 7892 }, { "epoch": 27.217241379310344, "grad_norm": 0.9343141317367554, "learning_rate": 2.9706206896551723e-05, "loss": 0.2203, "step": 7893 }, { "epoch": 27.220689655172414, "grad_norm": 0.8019468784332275, "learning_rate": 2.9705747126436782e-05, "loss": 0.2095, "step": 7894 }, { "epoch": 27.224137931034484, "grad_norm": 0.9010213613510132, "learning_rate": 2.970528735632184e-05, "loss": 0.2231, "step": 7895 }, { "epoch": 27.22758620689655, "grad_norm": 1.1213492155075073, "learning_rate": 2.9704827586206896e-05, "loss": 0.2105, "step": 7896 }, { "epoch": 27.23103448275862, "grad_norm": 0.7184605002403259, "learning_rate": 2.9704367816091955e-05, "loss": 0.212, "step": 7897 }, { "epoch": 27.23448275862069, "grad_norm": 1.0105363130569458, "learning_rate": 2.970390804597701e-05, "loss": 0.1974, "step": 7898 }, { "epoch": 27.23793103448276, "grad_norm": 0.680823564529419, "learning_rate": 2.9703448275862072e-05, "loss": 0.2126, "step": 7899 }, { "epoch": 27.24137931034483, "grad_norm": 1.058469533920288, "learning_rate": 2.9702988505747127e-05, "loss": 0.1933, "step": 7900 }, { "epoch": 27.244827586206895, "grad_norm": 1.8035274744033813, "learning_rate": 2.9702528735632183e-05, "loss": 0.2312, "step": 7901 }, { "epoch": 27.248275862068965, "grad_norm": 1.5079659223556519, "learning_rate": 2.970206896551724e-05, "loss": 0.2112, "step": 7902 }, { "epoch": 27.251724137931035, "grad_norm": 1.5274147987365723, "learning_rate": 2.97016091954023e-05, "loss": 0.1952, "step": 7903 }, { "epoch": 27.255172413793105, "grad_norm": 1.035022497177124, "learning_rate": 2.970114942528736e-05, "loss": 0.2584, "step": 7904 }, { "epoch": 27.25862068965517, "grad_norm": 2.2302794456481934, "learning_rate": 2.9700689655172414e-05, "loss": 0.2825, "step": 7905 }, { "epoch": 27.26206896551724, "grad_norm": 0.5276262760162354, "learning_rate": 2.970022988505747e-05, "loss": 0.2785, "step": 7906 }, { "epoch": 27.26551724137931, "grad_norm": 1.2700682878494263, "learning_rate": 2.969977011494253e-05, "loss": 0.2738, "step": 7907 }, { "epoch": 27.26896551724138, "grad_norm": 0.5101035237312317, "learning_rate": 2.9699310344827587e-05, "loss": 0.2759, "step": 7908 }, { "epoch": 27.27241379310345, "grad_norm": 0.7236302495002747, "learning_rate": 2.9698850574712645e-05, "loss": 0.2488, "step": 7909 }, { "epoch": 27.275862068965516, "grad_norm": 0.6984092593193054, "learning_rate": 2.96983908045977e-05, "loss": 0.2333, "step": 7910 }, { "epoch": 27.279310344827586, "grad_norm": 0.5963259935379028, "learning_rate": 2.969793103448276e-05, "loss": 0.2347, "step": 7911 }, { "epoch": 27.282758620689656, "grad_norm": 0.9965476393699646, "learning_rate": 2.9697471264367818e-05, "loss": 0.2748, "step": 7912 }, { "epoch": 27.286206896551725, "grad_norm": 0.6637295484542847, "learning_rate": 2.9697011494252873e-05, "loss": 0.2515, "step": 7913 }, { "epoch": 27.28965517241379, "grad_norm": 1.5764626264572144, "learning_rate": 2.9696551724137932e-05, "loss": 0.2231, "step": 7914 }, { "epoch": 27.29310344827586, "grad_norm": 0.6269740462303162, "learning_rate": 2.969609195402299e-05, "loss": 0.2156, "step": 7915 }, { "epoch": 27.29655172413793, "grad_norm": 1.0458929538726807, "learning_rate": 2.9695632183908046e-05, "loss": 0.2779, "step": 7916 }, { "epoch": 27.3, "grad_norm": 0.5884857773780823, "learning_rate": 2.9695172413793105e-05, "loss": 0.2407, "step": 7917 }, { "epoch": 27.30344827586207, "grad_norm": 1.1437243223190308, "learning_rate": 2.969471264367816e-05, "loss": 0.2183, "step": 7918 }, { "epoch": 27.306896551724137, "grad_norm": 0.631580114364624, "learning_rate": 2.969425287356322e-05, "loss": 0.2128, "step": 7919 }, { "epoch": 27.310344827586206, "grad_norm": 1.4496670961380005, "learning_rate": 2.9693793103448277e-05, "loss": 0.2422, "step": 7920 }, { "epoch": 27.313793103448276, "grad_norm": 0.7034358382225037, "learning_rate": 2.9693333333333333e-05, "loss": 0.1865, "step": 7921 }, { "epoch": 27.317241379310346, "grad_norm": 1.0768671035766602, "learning_rate": 2.969287356321839e-05, "loss": 0.2353, "step": 7922 }, { "epoch": 27.320689655172412, "grad_norm": 1.3526263236999512, "learning_rate": 2.969241379310345e-05, "loss": 0.2041, "step": 7923 }, { "epoch": 27.324137931034482, "grad_norm": 0.7999768257141113, "learning_rate": 2.9691954022988505e-05, "loss": 0.2204, "step": 7924 }, { "epoch": 27.32758620689655, "grad_norm": 0.696520209312439, "learning_rate": 2.9691494252873564e-05, "loss": 0.2195, "step": 7925 }, { "epoch": 27.33103448275862, "grad_norm": 2.656719446182251, "learning_rate": 2.969103448275862e-05, "loss": 0.2357, "step": 7926 }, { "epoch": 27.33448275862069, "grad_norm": 1.3845690488815308, "learning_rate": 2.969057471264368e-05, "loss": 0.2286, "step": 7927 }, { "epoch": 27.337931034482757, "grad_norm": 0.8483383655548096, "learning_rate": 2.9690114942528737e-05, "loss": 0.2312, "step": 7928 }, { "epoch": 27.341379310344827, "grad_norm": 0.9618819355964661, "learning_rate": 2.9689655172413792e-05, "loss": 0.2292, "step": 7929 }, { "epoch": 27.344827586206897, "grad_norm": 1.4308350086212158, "learning_rate": 2.968919540229885e-05, "loss": 0.3347, "step": 7930 }, { "epoch": 27.348275862068967, "grad_norm": 0.9110429883003235, "learning_rate": 2.968873563218391e-05, "loss": 0.3086, "step": 7931 }, { "epoch": 27.351724137931033, "grad_norm": 0.6675607562065125, "learning_rate": 2.9688275862068968e-05, "loss": 0.275, "step": 7932 }, { "epoch": 27.355172413793102, "grad_norm": 0.6991917490959167, "learning_rate": 2.9687816091954023e-05, "loss": 0.2791, "step": 7933 }, { "epoch": 27.358620689655172, "grad_norm": 1.2435595989227295, "learning_rate": 2.968735632183908e-05, "loss": 0.2778, "step": 7934 }, { "epoch": 27.362068965517242, "grad_norm": 0.9864099621772766, "learning_rate": 2.968689655172414e-05, "loss": 0.2546, "step": 7935 }, { "epoch": 27.36551724137931, "grad_norm": 1.2702738046646118, "learning_rate": 2.9686436781609196e-05, "loss": 0.2369, "step": 7936 }, { "epoch": 27.368965517241378, "grad_norm": 0.8267258405685425, "learning_rate": 2.9685977011494254e-05, "loss": 0.247, "step": 7937 }, { "epoch": 27.372413793103448, "grad_norm": 1.4237947463989258, "learning_rate": 2.968551724137931e-05, "loss": 0.2165, "step": 7938 }, { "epoch": 27.375862068965517, "grad_norm": 0.6090787649154663, "learning_rate": 2.968505747126437e-05, "loss": 0.216, "step": 7939 }, { "epoch": 27.379310344827587, "grad_norm": 1.329795241355896, "learning_rate": 2.9684597701149427e-05, "loss": 0.2136, "step": 7940 }, { "epoch": 27.382758620689657, "grad_norm": 0.7252494096755981, "learning_rate": 2.9684137931034482e-05, "loss": 0.2404, "step": 7941 }, { "epoch": 27.386206896551723, "grad_norm": 1.5066823959350586, "learning_rate": 2.968367816091954e-05, "loss": 0.2096, "step": 7942 }, { "epoch": 27.389655172413793, "grad_norm": 0.9426292777061462, "learning_rate": 2.96832183908046e-05, "loss": 0.2137, "step": 7943 }, { "epoch": 27.393103448275863, "grad_norm": 0.8319422602653503, "learning_rate": 2.9682758620689655e-05, "loss": 0.2102, "step": 7944 }, { "epoch": 27.396551724137932, "grad_norm": 0.9256443381309509, "learning_rate": 2.9682298850574714e-05, "loss": 0.2685, "step": 7945 }, { "epoch": 27.4, "grad_norm": 1.0247671604156494, "learning_rate": 2.968183908045977e-05, "loss": 0.2377, "step": 7946 }, { "epoch": 27.40344827586207, "grad_norm": 0.680962324142456, "learning_rate": 2.9681379310344828e-05, "loss": 0.2306, "step": 7947 }, { "epoch": 27.406896551724138, "grad_norm": 0.9193511009216309, "learning_rate": 2.9680919540229886e-05, "loss": 0.2325, "step": 7948 }, { "epoch": 27.410344827586208, "grad_norm": 0.9217159152030945, "learning_rate": 2.9680459770114942e-05, "loss": 0.2213, "step": 7949 }, { "epoch": 27.413793103448278, "grad_norm": 2.5776164531707764, "learning_rate": 2.968e-05, "loss": 0.241, "step": 7950 }, { "epoch": 27.417241379310344, "grad_norm": 0.9929152131080627, "learning_rate": 2.967954022988506e-05, "loss": 0.2345, "step": 7951 }, { "epoch": 27.420689655172414, "grad_norm": 1.226743459701538, "learning_rate": 2.9679080459770114e-05, "loss": 0.2168, "step": 7952 }, { "epoch": 27.424137931034483, "grad_norm": 0.9319359064102173, "learning_rate": 2.9678620689655173e-05, "loss": 0.236, "step": 7953 }, { "epoch": 27.427586206896553, "grad_norm": 1.7574021816253662, "learning_rate": 2.967816091954023e-05, "loss": 0.2215, "step": 7954 }, { "epoch": 27.43103448275862, "grad_norm": 2.1972808837890625, "learning_rate": 2.967770114942529e-05, "loss": 0.3187, "step": 7955 }, { "epoch": 27.43448275862069, "grad_norm": 0.6297042965888977, "learning_rate": 2.9677241379310346e-05, "loss": 0.3117, "step": 7956 }, { "epoch": 27.43793103448276, "grad_norm": 0.7705216407775879, "learning_rate": 2.96767816091954e-05, "loss": 0.2921, "step": 7957 }, { "epoch": 27.44137931034483, "grad_norm": 1.2934318780899048, "learning_rate": 2.967632183908046e-05, "loss": 0.2738, "step": 7958 }, { "epoch": 27.444827586206898, "grad_norm": 0.5949136018753052, "learning_rate": 2.967586206896552e-05, "loss": 0.2563, "step": 7959 }, { "epoch": 27.448275862068964, "grad_norm": 0.6511861681938171, "learning_rate": 2.9675402298850577e-05, "loss": 0.2643, "step": 7960 }, { "epoch": 27.451724137931034, "grad_norm": 0.7806310653686523, "learning_rate": 2.9674942528735632e-05, "loss": 0.2513, "step": 7961 }, { "epoch": 27.455172413793104, "grad_norm": 0.5420365929603577, "learning_rate": 2.9674482758620688e-05, "loss": 0.2426, "step": 7962 }, { "epoch": 27.458620689655174, "grad_norm": 0.7857410311698914, "learning_rate": 2.967402298850575e-05, "loss": 0.2331, "step": 7963 }, { "epoch": 27.46206896551724, "grad_norm": 0.8543922305107117, "learning_rate": 2.9673563218390805e-05, "loss": 0.2457, "step": 7964 }, { "epoch": 27.46551724137931, "grad_norm": 1.3428256511688232, "learning_rate": 2.9673103448275864e-05, "loss": 0.2092, "step": 7965 }, { "epoch": 27.46896551724138, "grad_norm": 0.6461120247840881, "learning_rate": 2.967264367816092e-05, "loss": 0.2152, "step": 7966 }, { "epoch": 27.47241379310345, "grad_norm": 0.8212090134620667, "learning_rate": 2.9672183908045978e-05, "loss": 0.2312, "step": 7967 }, { "epoch": 27.47586206896552, "grad_norm": 0.9656428694725037, "learning_rate": 2.9671724137931036e-05, "loss": 0.2202, "step": 7968 }, { "epoch": 27.479310344827585, "grad_norm": 0.7196829319000244, "learning_rate": 2.9671264367816092e-05, "loss": 0.2392, "step": 7969 }, { "epoch": 27.482758620689655, "grad_norm": 1.804418683052063, "learning_rate": 2.967080459770115e-05, "loss": 0.2271, "step": 7970 }, { "epoch": 27.486206896551725, "grad_norm": 0.7732331156730652, "learning_rate": 2.967034482758621e-05, "loss": 0.2068, "step": 7971 }, { "epoch": 27.489655172413794, "grad_norm": 0.7216419577598572, "learning_rate": 2.9669885057471264e-05, "loss": 0.217, "step": 7972 }, { "epoch": 27.49310344827586, "grad_norm": 1.054513692855835, "learning_rate": 2.9669425287356323e-05, "loss": 0.2338, "step": 7973 }, { "epoch": 27.49655172413793, "grad_norm": 2.0680158138275146, "learning_rate": 2.966896551724138e-05, "loss": 0.2171, "step": 7974 }, { "epoch": 27.5, "grad_norm": 0.6643990874290466, "learning_rate": 2.9668505747126437e-05, "loss": 0.1997, "step": 7975 }, { "epoch": 27.50344827586207, "grad_norm": 1.1726727485656738, "learning_rate": 2.9668045977011496e-05, "loss": 0.2092, "step": 7976 }, { "epoch": 27.50689655172414, "grad_norm": 1.289300560951233, "learning_rate": 2.966758620689655e-05, "loss": 0.2116, "step": 7977 }, { "epoch": 27.510344827586206, "grad_norm": 1.1293034553527832, "learning_rate": 2.966712643678161e-05, "loss": 0.2067, "step": 7978 }, { "epoch": 27.513793103448275, "grad_norm": 2.0670323371887207, "learning_rate": 2.966666666666667e-05, "loss": 0.2836, "step": 7979 }, { "epoch": 27.517241379310345, "grad_norm": 1.5459064245224, "learning_rate": 2.9666206896551724e-05, "loss": 0.2942, "step": 7980 }, { "epoch": 27.520689655172415, "grad_norm": 0.7367529273033142, "learning_rate": 2.9665747126436782e-05, "loss": 0.314, "step": 7981 }, { "epoch": 27.52413793103448, "grad_norm": 1.571545124053955, "learning_rate": 2.9665287356321838e-05, "loss": 0.2813, "step": 7982 }, { "epoch": 27.52758620689655, "grad_norm": 0.9867838025093079, "learning_rate": 2.96648275862069e-05, "loss": 0.2699, "step": 7983 }, { "epoch": 27.53103448275862, "grad_norm": 0.5630431771278381, "learning_rate": 2.9664367816091955e-05, "loss": 0.2585, "step": 7984 }, { "epoch": 27.53448275862069, "grad_norm": 1.285261631011963, "learning_rate": 2.966390804597701e-05, "loss": 0.2632, "step": 7985 }, { "epoch": 27.53793103448276, "grad_norm": 0.888590931892395, "learning_rate": 2.966344827586207e-05, "loss": 0.2359, "step": 7986 }, { "epoch": 27.541379310344826, "grad_norm": 0.5797366499900818, "learning_rate": 2.9662988505747128e-05, "loss": 0.2404, "step": 7987 }, { "epoch": 27.544827586206896, "grad_norm": 0.6421138644218445, "learning_rate": 2.9662528735632186e-05, "loss": 0.2453, "step": 7988 }, { "epoch": 27.548275862068966, "grad_norm": 1.356542944908142, "learning_rate": 2.966206896551724e-05, "loss": 0.2374, "step": 7989 }, { "epoch": 27.551724137931036, "grad_norm": 0.7000864744186401, "learning_rate": 2.9661609195402297e-05, "loss": 0.2293, "step": 7990 }, { "epoch": 27.555172413793102, "grad_norm": 0.8153185248374939, "learning_rate": 2.966114942528736e-05, "loss": 0.2368, "step": 7991 }, { "epoch": 27.55862068965517, "grad_norm": 0.531346321105957, "learning_rate": 2.9660689655172414e-05, "loss": 0.2077, "step": 7992 }, { "epoch": 27.56206896551724, "grad_norm": 0.6629143357276917, "learning_rate": 2.9660229885057473e-05, "loss": 0.2328, "step": 7993 }, { "epoch": 27.56551724137931, "grad_norm": 0.5820608139038086, "learning_rate": 2.9659770114942528e-05, "loss": 0.2412, "step": 7994 }, { "epoch": 27.56896551724138, "grad_norm": 0.643884003162384, "learning_rate": 2.9659310344827587e-05, "loss": 0.2187, "step": 7995 }, { "epoch": 27.572413793103447, "grad_norm": 0.654169499874115, "learning_rate": 2.9658850574712646e-05, "loss": 0.2167, "step": 7996 }, { "epoch": 27.575862068965517, "grad_norm": 0.6747252941131592, "learning_rate": 2.96583908045977e-05, "loss": 0.213, "step": 7997 }, { "epoch": 27.579310344827586, "grad_norm": 1.354038119316101, "learning_rate": 2.965793103448276e-05, "loss": 0.218, "step": 7998 }, { "epoch": 27.582758620689656, "grad_norm": 0.8889662623405457, "learning_rate": 2.9657471264367818e-05, "loss": 0.191, "step": 7999 }, { "epoch": 27.586206896551722, "grad_norm": 0.8103294968605042, "learning_rate": 2.9657011494252874e-05, "loss": 0.2463, "step": 8000 }, { "epoch": 27.586206896551722, "eval_cer": 0.1324260934560169, "eval_loss": 0.3152608573436737, "eval_runtime": 17.4043, "eval_samples_per_second": 53.263, "eval_steps_per_second": 0.172, "eval_wer": 0.3088768115942029, "step": 8000 }, { "epoch": 27.589655172413792, "grad_norm": 2.357807159423828, "learning_rate": 2.9656551724137932e-05, "loss": 0.2383, "step": 8001 }, { "epoch": 27.593103448275862, "grad_norm": 1.5521420240402222, "learning_rate": 2.9656091954022988e-05, "loss": 0.2048, "step": 8002 }, { "epoch": 27.59655172413793, "grad_norm": 0.8351204991340637, "learning_rate": 2.965563218390805e-05, "loss": 0.2439, "step": 8003 }, { "epoch": 27.6, "grad_norm": 1.5536282062530518, "learning_rate": 2.9655172413793105e-05, "loss": 0.2309, "step": 8004 }, { "epoch": 27.603448275862068, "grad_norm": 2.9743831157684326, "learning_rate": 2.965471264367816e-05, "loss": 0.3205, "step": 8005 }, { "epoch": 27.606896551724137, "grad_norm": 0.6631364226341248, "learning_rate": 2.965425287356322e-05, "loss": 0.2972, "step": 8006 }, { "epoch": 27.610344827586207, "grad_norm": 0.43888136744499207, "learning_rate": 2.9653793103448278e-05, "loss": 0.2381, "step": 8007 }, { "epoch": 27.613793103448277, "grad_norm": 1.0660200119018555, "learning_rate": 2.9653333333333333e-05, "loss": 0.2521, "step": 8008 }, { "epoch": 27.617241379310343, "grad_norm": 0.575221061706543, "learning_rate": 2.965287356321839e-05, "loss": 0.2774, "step": 8009 }, { "epoch": 27.620689655172413, "grad_norm": 0.5864955186843872, "learning_rate": 2.9652413793103447e-05, "loss": 0.2318, "step": 8010 }, { "epoch": 27.624137931034483, "grad_norm": 0.6238292455673218, "learning_rate": 2.965195402298851e-05, "loss": 0.2453, "step": 8011 }, { "epoch": 27.627586206896552, "grad_norm": 0.7001510858535767, "learning_rate": 2.9651494252873564e-05, "loss": 0.2459, "step": 8012 }, { "epoch": 27.631034482758622, "grad_norm": 0.5803565382957458, "learning_rate": 2.965103448275862e-05, "loss": 0.2158, "step": 8013 }, { "epoch": 27.63448275862069, "grad_norm": 0.5088173747062683, "learning_rate": 2.9650574712643678e-05, "loss": 0.2136, "step": 8014 }, { "epoch": 27.637931034482758, "grad_norm": 0.6340955495834351, "learning_rate": 2.9650114942528737e-05, "loss": 0.2206, "step": 8015 }, { "epoch": 27.641379310344828, "grad_norm": 4.372450351715088, "learning_rate": 2.9649655172413796e-05, "loss": 0.2098, "step": 8016 }, { "epoch": 27.644827586206898, "grad_norm": 0.674708902835846, "learning_rate": 2.964919540229885e-05, "loss": 0.2722, "step": 8017 }, { "epoch": 27.648275862068964, "grad_norm": 0.6989539265632629, "learning_rate": 2.9648735632183906e-05, "loss": 0.239, "step": 8018 }, { "epoch": 27.651724137931033, "grad_norm": 0.7451843619346619, "learning_rate": 2.9648275862068968e-05, "loss": 0.2223, "step": 8019 }, { "epoch": 27.655172413793103, "grad_norm": 0.6028863191604614, "learning_rate": 2.9647816091954024e-05, "loss": 0.224, "step": 8020 }, { "epoch": 27.658620689655173, "grad_norm": 0.6043135523796082, "learning_rate": 2.9647356321839082e-05, "loss": 0.198, "step": 8021 }, { "epoch": 27.662068965517243, "grad_norm": 1.724177598953247, "learning_rate": 2.9646896551724138e-05, "loss": 0.2392, "step": 8022 }, { "epoch": 27.66551724137931, "grad_norm": 0.7591499090194702, "learning_rate": 2.9646436781609196e-05, "loss": 0.206, "step": 8023 }, { "epoch": 27.66896551724138, "grad_norm": 0.7991400957107544, "learning_rate": 2.9645977011494255e-05, "loss": 0.2012, "step": 8024 }, { "epoch": 27.67241379310345, "grad_norm": 0.808287501335144, "learning_rate": 2.964551724137931e-05, "loss": 0.2076, "step": 8025 }, { "epoch": 27.675862068965518, "grad_norm": 0.6673450469970703, "learning_rate": 2.964505747126437e-05, "loss": 0.2283, "step": 8026 }, { "epoch": 27.679310344827588, "grad_norm": 1.0817428827285767, "learning_rate": 2.9644597701149428e-05, "loss": 0.201, "step": 8027 }, { "epoch": 27.682758620689654, "grad_norm": 0.9698562026023865, "learning_rate": 2.9644137931034483e-05, "loss": 0.2163, "step": 8028 }, { "epoch": 27.686206896551724, "grad_norm": 2.7709319591522217, "learning_rate": 2.964367816091954e-05, "loss": 0.1932, "step": 8029 }, { "epoch": 27.689655172413794, "grad_norm": 5.237761497497559, "learning_rate": 2.9643218390804597e-05, "loss": 0.3925, "step": 8030 }, { "epoch": 27.693103448275863, "grad_norm": 0.749861478805542, "learning_rate": 2.964275862068966e-05, "loss": 0.3252, "step": 8031 }, { "epoch": 27.69655172413793, "grad_norm": 0.7996978759765625, "learning_rate": 2.9642298850574714e-05, "loss": 0.275, "step": 8032 }, { "epoch": 27.7, "grad_norm": 0.528333842754364, "learning_rate": 2.964183908045977e-05, "loss": 0.2228, "step": 8033 }, { "epoch": 27.70344827586207, "grad_norm": 0.9038131237030029, "learning_rate": 2.9641379310344828e-05, "loss": 0.2799, "step": 8034 }, { "epoch": 27.70689655172414, "grad_norm": 0.7060282826423645, "learning_rate": 2.9640919540229887e-05, "loss": 0.2241, "step": 8035 }, { "epoch": 27.71034482758621, "grad_norm": 0.919768750667572, "learning_rate": 2.9640459770114942e-05, "loss": 0.231, "step": 8036 }, { "epoch": 27.713793103448275, "grad_norm": 1.34187650680542, "learning_rate": 2.964e-05, "loss": 0.2199, "step": 8037 }, { "epoch": 27.717241379310344, "grad_norm": 0.9951978325843811, "learning_rate": 2.9639540229885056e-05, "loss": 0.2557, "step": 8038 }, { "epoch": 27.720689655172414, "grad_norm": 0.9325481653213501, "learning_rate": 2.9639080459770118e-05, "loss": 0.2361, "step": 8039 }, { "epoch": 27.724137931034484, "grad_norm": 0.6481718420982361, "learning_rate": 2.9638620689655173e-05, "loss": 0.2301, "step": 8040 }, { "epoch": 27.72758620689655, "grad_norm": 0.7103378176689148, "learning_rate": 2.963816091954023e-05, "loss": 0.2253, "step": 8041 }, { "epoch": 27.73103448275862, "grad_norm": 1.1607369184494019, "learning_rate": 2.9637701149425287e-05, "loss": 0.2242, "step": 8042 }, { "epoch": 27.73448275862069, "grad_norm": 0.7972998023033142, "learning_rate": 2.9637241379310346e-05, "loss": 0.2445, "step": 8043 }, { "epoch": 27.73793103448276, "grad_norm": 0.949785053730011, "learning_rate": 2.9636781609195405e-05, "loss": 0.2377, "step": 8044 }, { "epoch": 27.74137931034483, "grad_norm": 0.6090638041496277, "learning_rate": 2.963632183908046e-05, "loss": 0.2438, "step": 8045 }, { "epoch": 27.744827586206895, "grad_norm": 0.6305696964263916, "learning_rate": 2.9635862068965515e-05, "loss": 0.2091, "step": 8046 }, { "epoch": 27.748275862068965, "grad_norm": 1.1685103178024292, "learning_rate": 2.9635402298850577e-05, "loss": 0.2222, "step": 8047 }, { "epoch": 27.751724137931035, "grad_norm": 0.8212053179740906, "learning_rate": 2.9634942528735633e-05, "loss": 0.2198, "step": 8048 }, { "epoch": 27.755172413793105, "grad_norm": 1.245360255241394, "learning_rate": 2.963448275862069e-05, "loss": 0.2342, "step": 8049 }, { "epoch": 27.75862068965517, "grad_norm": 1.0074892044067383, "learning_rate": 2.9634022988505747e-05, "loss": 0.2276, "step": 8050 }, { "epoch": 27.76206896551724, "grad_norm": 0.9022911787033081, "learning_rate": 2.9633563218390805e-05, "loss": 0.2525, "step": 8051 }, { "epoch": 27.76551724137931, "grad_norm": 1.1119928359985352, "learning_rate": 2.9633103448275864e-05, "loss": 0.2385, "step": 8052 }, { "epoch": 27.76896551724138, "grad_norm": 0.6947523355484009, "learning_rate": 2.963264367816092e-05, "loss": 0.1875, "step": 8053 }, { "epoch": 27.77241379310345, "grad_norm": 0.9298329949378967, "learning_rate": 2.9632183908045978e-05, "loss": 0.2617, "step": 8054 }, { "epoch": 27.775862068965516, "grad_norm": 1.6343857049942017, "learning_rate": 2.9631724137931037e-05, "loss": 0.3025, "step": 8055 }, { "epoch": 27.779310344827586, "grad_norm": 0.6148260235786438, "learning_rate": 2.9631264367816092e-05, "loss": 0.275, "step": 8056 }, { "epoch": 27.782758620689656, "grad_norm": 0.6363769769668579, "learning_rate": 2.963080459770115e-05, "loss": 0.2405, "step": 8057 }, { "epoch": 27.786206896551725, "grad_norm": 0.6320227980613708, "learning_rate": 2.9630344827586206e-05, "loss": 0.2734, "step": 8058 }, { "epoch": 27.78965517241379, "grad_norm": 0.5837138295173645, "learning_rate": 2.9629885057471268e-05, "loss": 0.2369, "step": 8059 }, { "epoch": 27.79310344827586, "grad_norm": 1.2616052627563477, "learning_rate": 2.9629425287356323e-05, "loss": 0.2171, "step": 8060 }, { "epoch": 27.79655172413793, "grad_norm": 0.7830363512039185, "learning_rate": 2.962896551724138e-05, "loss": 0.2498, "step": 8061 }, { "epoch": 27.8, "grad_norm": 0.6122834086418152, "learning_rate": 2.9628505747126437e-05, "loss": 0.2305, "step": 8062 }, { "epoch": 27.80344827586207, "grad_norm": 0.5561302304267883, "learning_rate": 2.9628045977011496e-05, "loss": 0.2398, "step": 8063 }, { "epoch": 27.806896551724137, "grad_norm": 1.3613405227661133, "learning_rate": 2.962758620689655e-05, "loss": 0.2105, "step": 8064 }, { "epoch": 27.810344827586206, "grad_norm": 0.8954808712005615, "learning_rate": 2.962712643678161e-05, "loss": 0.2577, "step": 8065 }, { "epoch": 27.813793103448276, "grad_norm": 2.2944223880767822, "learning_rate": 2.9626666666666665e-05, "loss": 0.2377, "step": 8066 }, { "epoch": 27.817241379310346, "grad_norm": 0.8221507668495178, "learning_rate": 2.9626206896551727e-05, "loss": 0.2185, "step": 8067 }, { "epoch": 27.820689655172412, "grad_norm": 2.8907370567321777, "learning_rate": 2.9625747126436783e-05, "loss": 0.2298, "step": 8068 }, { "epoch": 27.824137931034482, "grad_norm": 0.7098746299743652, "learning_rate": 2.9625287356321838e-05, "loss": 0.2351, "step": 8069 }, { "epoch": 27.82758620689655, "grad_norm": 0.8393989205360413, "learning_rate": 2.9624827586206897e-05, "loss": 0.2416, "step": 8070 }, { "epoch": 27.83103448275862, "grad_norm": 0.8666374087333679, "learning_rate": 2.9624367816091955e-05, "loss": 0.199, "step": 8071 }, { "epoch": 27.83448275862069, "grad_norm": 0.9631679654121399, "learning_rate": 2.9623908045977014e-05, "loss": 0.2149, "step": 8072 }, { "epoch": 27.837931034482757, "grad_norm": 1.4077469110488892, "learning_rate": 2.962344827586207e-05, "loss": 0.2185, "step": 8073 }, { "epoch": 27.841379310344827, "grad_norm": 0.7764573097229004, "learning_rate": 2.9622988505747125e-05, "loss": 0.2412, "step": 8074 }, { "epoch": 27.844827586206897, "grad_norm": 0.8217126131057739, "learning_rate": 2.9622528735632187e-05, "loss": 0.2255, "step": 8075 }, { "epoch": 27.848275862068967, "grad_norm": 0.9697218537330627, "learning_rate": 2.9622068965517242e-05, "loss": 0.2113, "step": 8076 }, { "epoch": 27.851724137931036, "grad_norm": 1.2910257577896118, "learning_rate": 2.96216091954023e-05, "loss": 0.2101, "step": 8077 }, { "epoch": 27.855172413793102, "grad_norm": 1.261801838874817, "learning_rate": 2.9621149425287356e-05, "loss": 0.2015, "step": 8078 }, { "epoch": 27.858620689655172, "grad_norm": 2.169503688812256, "learning_rate": 2.9620689655172415e-05, "loss": 0.2591, "step": 8079 }, { "epoch": 27.862068965517242, "grad_norm": 1.3774718046188354, "learning_rate": 2.9620229885057473e-05, "loss": 0.2566, "step": 8080 }, { "epoch": 27.86551724137931, "grad_norm": 0.9168815612792969, "learning_rate": 2.961977011494253e-05, "loss": 0.3029, "step": 8081 }, { "epoch": 27.868965517241378, "grad_norm": 0.6293153166770935, "learning_rate": 2.9619310344827587e-05, "loss": 0.2505, "step": 8082 }, { "epoch": 27.872413793103448, "grad_norm": 0.5543842315673828, "learning_rate": 2.9618850574712646e-05, "loss": 0.2618, "step": 8083 }, { "epoch": 27.875862068965517, "grad_norm": 0.5921515226364136, "learning_rate": 2.96183908045977e-05, "loss": 0.2552, "step": 8084 }, { "epoch": 27.879310344827587, "grad_norm": 0.7193567156791687, "learning_rate": 2.961793103448276e-05, "loss": 0.2479, "step": 8085 }, { "epoch": 27.882758620689657, "grad_norm": 0.5195422768592834, "learning_rate": 2.9617471264367815e-05, "loss": 0.2346, "step": 8086 }, { "epoch": 27.886206896551723, "grad_norm": 0.6648915410041809, "learning_rate": 2.9617011494252877e-05, "loss": 0.2841, "step": 8087 }, { "epoch": 27.889655172413793, "grad_norm": 0.6162506937980652, "learning_rate": 2.9616551724137933e-05, "loss": 0.2402, "step": 8088 }, { "epoch": 27.893103448275863, "grad_norm": 1.0964301824569702, "learning_rate": 2.9616091954022988e-05, "loss": 0.2267, "step": 8089 }, { "epoch": 27.896551724137932, "grad_norm": 1.1767257452011108, "learning_rate": 2.9615632183908047e-05, "loss": 0.2446, "step": 8090 }, { "epoch": 27.9, "grad_norm": 0.7175918817520142, "learning_rate": 2.9615172413793105e-05, "loss": 0.2202, "step": 8091 }, { "epoch": 27.90344827586207, "grad_norm": 0.7163825035095215, "learning_rate": 2.9614712643678164e-05, "loss": 0.2298, "step": 8092 }, { "epoch": 27.906896551724138, "grad_norm": 0.7953805327415466, "learning_rate": 2.961425287356322e-05, "loss": 0.2325, "step": 8093 }, { "epoch": 27.910344827586208, "grad_norm": 1.5353559255599976, "learning_rate": 2.9613793103448275e-05, "loss": 0.2187, "step": 8094 }, { "epoch": 27.913793103448278, "grad_norm": 1.6902577877044678, "learning_rate": 2.9613333333333337e-05, "loss": 0.2183, "step": 8095 }, { "epoch": 27.917241379310344, "grad_norm": 1.0104937553405762, "learning_rate": 2.9612873563218392e-05, "loss": 0.2408, "step": 8096 }, { "epoch": 27.920689655172414, "grad_norm": 0.7826731204986572, "learning_rate": 2.9612413793103447e-05, "loss": 0.2055, "step": 8097 }, { "epoch": 27.924137931034483, "grad_norm": 0.9934120178222656, "learning_rate": 2.9611954022988506e-05, "loss": 0.2442, "step": 8098 }, { "epoch": 27.927586206896553, "grad_norm": 0.9662966728210449, "learning_rate": 2.9611494252873565e-05, "loss": 0.2054, "step": 8099 }, { "epoch": 27.93103448275862, "grad_norm": 0.8010696172714233, "learning_rate": 2.9611034482758623e-05, "loss": 0.2235, "step": 8100 }, { "epoch": 27.93448275862069, "grad_norm": 1.0461158752441406, "learning_rate": 2.961057471264368e-05, "loss": 0.22, "step": 8101 }, { "epoch": 27.93793103448276, "grad_norm": 1.2171363830566406, "learning_rate": 2.9610114942528734e-05, "loss": 0.2134, "step": 8102 }, { "epoch": 27.94137931034483, "grad_norm": 2.571756601333618, "learning_rate": 2.9609655172413796e-05, "loss": 0.2518, "step": 8103 }, { "epoch": 27.944827586206898, "grad_norm": 1.6651136875152588, "learning_rate": 2.960919540229885e-05, "loss": 0.2725, "step": 8104 }, { "epoch": 27.948275862068964, "grad_norm": 1.6579197645187378, "learning_rate": 2.960873563218391e-05, "loss": 0.3062, "step": 8105 }, { "epoch": 27.951724137931034, "grad_norm": 1.300240159034729, "learning_rate": 2.9608275862068965e-05, "loss": 0.2793, "step": 8106 }, { "epoch": 27.955172413793104, "grad_norm": 1.3805524110794067, "learning_rate": 2.9607816091954024e-05, "loss": 0.2462, "step": 8107 }, { "epoch": 27.958620689655174, "grad_norm": 0.9648283123970032, "learning_rate": 2.9607356321839083e-05, "loss": 0.2277, "step": 8108 }, { "epoch": 27.96206896551724, "grad_norm": 0.7317817211151123, "learning_rate": 2.9606896551724138e-05, "loss": 0.2598, "step": 8109 }, { "epoch": 27.96551724137931, "grad_norm": 0.951326847076416, "learning_rate": 2.9606436781609197e-05, "loss": 0.2295, "step": 8110 }, { "epoch": 27.96896551724138, "grad_norm": 0.9639319181442261, "learning_rate": 2.9605977011494255e-05, "loss": 0.2185, "step": 8111 }, { "epoch": 27.97241379310345, "grad_norm": 0.5270469188690186, "learning_rate": 2.960551724137931e-05, "loss": 0.2373, "step": 8112 }, { "epoch": 27.97586206896552, "grad_norm": 0.6265546679496765, "learning_rate": 2.960505747126437e-05, "loss": 0.2056, "step": 8113 }, { "epoch": 27.979310344827585, "grad_norm": 0.6927165985107422, "learning_rate": 2.9604597701149425e-05, "loss": 0.2394, "step": 8114 }, { "epoch": 27.982758620689655, "grad_norm": 0.5846370458602905, "learning_rate": 2.9604137931034487e-05, "loss": 0.2095, "step": 8115 }, { "epoch": 27.986206896551725, "grad_norm": 0.5572791695594788, "learning_rate": 2.9603678160919542e-05, "loss": 0.2304, "step": 8116 }, { "epoch": 27.989655172413794, "grad_norm": 0.8045250773429871, "learning_rate": 2.9603218390804597e-05, "loss": 0.2422, "step": 8117 }, { "epoch": 27.99310344827586, "grad_norm": 1.8152981996536255, "learning_rate": 2.9602758620689656e-05, "loss": 0.2131, "step": 8118 }, { "epoch": 27.99655172413793, "grad_norm": 1.4069015979766846, "learning_rate": 2.9602298850574715e-05, "loss": 0.2307, "step": 8119 }, { "epoch": 28.0, "grad_norm": 2.1620469093322754, "learning_rate": 2.9601839080459773e-05, "loss": 0.3136, "step": 8120 }, { "epoch": 28.00344827586207, "grad_norm": 0.9255272746086121, "learning_rate": 2.960137931034483e-05, "loss": 0.3255, "step": 8121 }, { "epoch": 28.00689655172414, "grad_norm": 0.4959462583065033, "learning_rate": 2.9600919540229884e-05, "loss": 0.235, "step": 8122 }, { "epoch": 28.010344827586206, "grad_norm": 0.5091273784637451, "learning_rate": 2.9600459770114946e-05, "loss": 0.2551, "step": 8123 }, { "epoch": 28.013793103448275, "grad_norm": 1.0608997344970703, "learning_rate": 2.96e-05, "loss": 0.2497, "step": 8124 }, { "epoch": 28.017241379310345, "grad_norm": 0.5584940314292908, "learning_rate": 2.9599540229885056e-05, "loss": 0.2036, "step": 8125 }, { "epoch": 28.020689655172415, "grad_norm": 0.6935164332389832, "learning_rate": 2.9599080459770115e-05, "loss": 0.2403, "step": 8126 }, { "epoch": 28.02413793103448, "grad_norm": 0.778094470500946, "learning_rate": 2.9598620689655174e-05, "loss": 0.2521, "step": 8127 }, { "epoch": 28.02758620689655, "grad_norm": 0.7965030670166016, "learning_rate": 2.9598160919540233e-05, "loss": 0.2397, "step": 8128 }, { "epoch": 28.03103448275862, "grad_norm": 0.5871703624725342, "learning_rate": 2.9597701149425288e-05, "loss": 0.2237, "step": 8129 }, { "epoch": 28.03448275862069, "grad_norm": 2.4312212467193604, "learning_rate": 2.9597241379310343e-05, "loss": 0.2129, "step": 8130 }, { "epoch": 28.03793103448276, "grad_norm": 0.7593165040016174, "learning_rate": 2.9596781609195405e-05, "loss": 0.1979, "step": 8131 }, { "epoch": 28.041379310344826, "grad_norm": 0.7147059440612793, "learning_rate": 2.959632183908046e-05, "loss": 0.2185, "step": 8132 }, { "epoch": 28.044827586206896, "grad_norm": 0.5796089768409729, "learning_rate": 2.959586206896552e-05, "loss": 0.2383, "step": 8133 }, { "epoch": 28.048275862068966, "grad_norm": 0.6932573318481445, "learning_rate": 2.9595402298850574e-05, "loss": 0.2065, "step": 8134 }, { "epoch": 28.051724137931036, "grad_norm": 0.7993543744087219, "learning_rate": 2.9594942528735633e-05, "loss": 0.2307, "step": 8135 }, { "epoch": 28.055172413793102, "grad_norm": 0.7426981329917908, "learning_rate": 2.9594482758620692e-05, "loss": 0.2039, "step": 8136 }, { "epoch": 28.05862068965517, "grad_norm": 1.3155856132507324, "learning_rate": 2.9594022988505747e-05, "loss": 0.1989, "step": 8137 }, { "epoch": 28.06206896551724, "grad_norm": 1.0210449695587158, "learning_rate": 2.9593563218390806e-05, "loss": 0.1929, "step": 8138 }, { "epoch": 28.06551724137931, "grad_norm": 0.8812614679336548, "learning_rate": 2.9593103448275865e-05, "loss": 0.2441, "step": 8139 }, { "epoch": 28.06896551724138, "grad_norm": 1.0228407382965088, "learning_rate": 2.959264367816092e-05, "loss": 0.2261, "step": 8140 }, { "epoch": 28.072413793103447, "grad_norm": 5.022749423980713, "learning_rate": 2.959218390804598e-05, "loss": 0.1992, "step": 8141 }, { "epoch": 28.075862068965517, "grad_norm": 0.8490011692047119, "learning_rate": 2.9591724137931034e-05, "loss": 0.2029, "step": 8142 }, { "epoch": 28.079310344827586, "grad_norm": 2.0915749073028564, "learning_rate": 2.9591264367816096e-05, "loss": 0.2015, "step": 8143 }, { "epoch": 28.082758620689656, "grad_norm": 1.5137197971343994, "learning_rate": 2.959080459770115e-05, "loss": 0.2477, "step": 8144 }, { "epoch": 28.086206896551722, "grad_norm": 1.8281251192092896, "learning_rate": 2.9590344827586206e-05, "loss": 0.277, "step": 8145 }, { "epoch": 28.089655172413792, "grad_norm": 0.6239591240882874, "learning_rate": 2.9589885057471265e-05, "loss": 0.3174, "step": 8146 }, { "epoch": 28.093103448275862, "grad_norm": 0.47711315751075745, "learning_rate": 2.9589425287356324e-05, "loss": 0.2538, "step": 8147 }, { "epoch": 28.09655172413793, "grad_norm": 0.4692818224430084, "learning_rate": 2.9588965517241382e-05, "loss": 0.2251, "step": 8148 }, { "epoch": 28.1, "grad_norm": 0.5126651525497437, "learning_rate": 2.9588505747126438e-05, "loss": 0.2448, "step": 8149 }, { "epoch": 28.103448275862068, "grad_norm": 0.6440304517745972, "learning_rate": 2.9588045977011493e-05, "loss": 0.2115, "step": 8150 }, { "epoch": 28.106896551724137, "grad_norm": 0.5416520237922668, "learning_rate": 2.9587586206896555e-05, "loss": 0.2317, "step": 8151 }, { "epoch": 28.110344827586207, "grad_norm": 0.7605284452438354, "learning_rate": 2.958712643678161e-05, "loss": 0.225, "step": 8152 }, { "epoch": 28.113793103448277, "grad_norm": 0.827438473701477, "learning_rate": 2.9586666666666666e-05, "loss": 0.2288, "step": 8153 }, { "epoch": 28.117241379310343, "grad_norm": 0.5154626369476318, "learning_rate": 2.9586206896551724e-05, "loss": 0.1984, "step": 8154 }, { "epoch": 28.120689655172413, "grad_norm": 1.26478111743927, "learning_rate": 2.958574712643678e-05, "loss": 0.232, "step": 8155 }, { "epoch": 28.124137931034483, "grad_norm": 0.8421079516410828, "learning_rate": 2.9585287356321842e-05, "loss": 0.2205, "step": 8156 }, { "epoch": 28.127586206896552, "grad_norm": 0.5849840641021729, "learning_rate": 2.9584827586206897e-05, "loss": 0.2448, "step": 8157 }, { "epoch": 28.131034482758622, "grad_norm": 0.7636149525642395, "learning_rate": 2.9584367816091952e-05, "loss": 0.2092, "step": 8158 }, { "epoch": 28.13448275862069, "grad_norm": 0.6824294924736023, "learning_rate": 2.958390804597701e-05, "loss": 0.1979, "step": 8159 }, { "epoch": 28.137931034482758, "grad_norm": 0.8022952079772949, "learning_rate": 2.958344827586207e-05, "loss": 0.2296, "step": 8160 }, { "epoch": 28.141379310344828, "grad_norm": 1.5201189517974854, "learning_rate": 2.958298850574713e-05, "loss": 0.2245, "step": 8161 }, { "epoch": 28.144827586206898, "grad_norm": 0.9325366020202637, "learning_rate": 2.9582528735632184e-05, "loss": 0.2303, "step": 8162 }, { "epoch": 28.148275862068967, "grad_norm": 1.1626808643341064, "learning_rate": 2.958206896551724e-05, "loss": 0.2093, "step": 8163 }, { "epoch": 28.151724137931033, "grad_norm": 1.1635875701904297, "learning_rate": 2.95816091954023e-05, "loss": 0.2147, "step": 8164 }, { "epoch": 28.155172413793103, "grad_norm": 1.6512339115142822, "learning_rate": 2.9581149425287356e-05, "loss": 0.207, "step": 8165 }, { "epoch": 28.158620689655173, "grad_norm": 2.0482919216156006, "learning_rate": 2.9580689655172415e-05, "loss": 0.2194, "step": 8166 }, { "epoch": 28.162068965517243, "grad_norm": 0.7976782917976379, "learning_rate": 2.958022988505747e-05, "loss": 0.2027, "step": 8167 }, { "epoch": 28.16551724137931, "grad_norm": 0.7892001867294312, "learning_rate": 2.957977011494253e-05, "loss": 0.2104, "step": 8168 }, { "epoch": 28.16896551724138, "grad_norm": 2.163940906524658, "learning_rate": 2.9579310344827588e-05, "loss": 0.2517, "step": 8169 }, { "epoch": 28.17241379310345, "grad_norm": 1.3348132371902466, "learning_rate": 2.9578850574712643e-05, "loss": 0.3017, "step": 8170 }, { "epoch": 28.175862068965518, "grad_norm": 0.7743703722953796, "learning_rate": 2.9578390804597702e-05, "loss": 0.2939, "step": 8171 }, { "epoch": 28.179310344827588, "grad_norm": 0.8536284565925598, "learning_rate": 2.957793103448276e-05, "loss": 0.2723, "step": 8172 }, { "epoch": 28.182758620689654, "grad_norm": 0.7152978777885437, "learning_rate": 2.9577471264367816e-05, "loss": 0.2214, "step": 8173 }, { "epoch": 28.186206896551724, "grad_norm": 1.1237210035324097, "learning_rate": 2.9577011494252874e-05, "loss": 0.2769, "step": 8174 }, { "epoch": 28.189655172413794, "grad_norm": 0.802254319190979, "learning_rate": 2.957655172413793e-05, "loss": 0.2526, "step": 8175 }, { "epoch": 28.193103448275863, "grad_norm": 0.5940929651260376, "learning_rate": 2.9576091954022992e-05, "loss": 0.2265, "step": 8176 }, { "epoch": 28.19655172413793, "grad_norm": 0.7357165813446045, "learning_rate": 2.9575632183908047e-05, "loss": 0.2251, "step": 8177 }, { "epoch": 28.2, "grad_norm": 0.8475888967514038, "learning_rate": 2.9575172413793102e-05, "loss": 0.2421, "step": 8178 }, { "epoch": 28.20344827586207, "grad_norm": 0.8915043473243713, "learning_rate": 2.957471264367816e-05, "loss": 0.2564, "step": 8179 }, { "epoch": 28.20689655172414, "grad_norm": 0.6617788672447205, "learning_rate": 2.957425287356322e-05, "loss": 0.2198, "step": 8180 }, { "epoch": 28.21034482758621, "grad_norm": 0.9319981336593628, "learning_rate": 2.957379310344828e-05, "loss": 0.212, "step": 8181 }, { "epoch": 28.213793103448275, "grad_norm": 0.7057940363883972, "learning_rate": 2.9573333333333334e-05, "loss": 0.2008, "step": 8182 }, { "epoch": 28.217241379310344, "grad_norm": 0.5067653656005859, "learning_rate": 2.957287356321839e-05, "loss": 0.1896, "step": 8183 }, { "epoch": 28.220689655172414, "grad_norm": 1.0309128761291504, "learning_rate": 2.957241379310345e-05, "loss": 0.2047, "step": 8184 }, { "epoch": 28.224137931034484, "grad_norm": 0.6312376856803894, "learning_rate": 2.9571954022988506e-05, "loss": 0.183, "step": 8185 }, { "epoch": 28.22758620689655, "grad_norm": 0.9855659008026123, "learning_rate": 2.957149425287356e-05, "loss": 0.2105, "step": 8186 }, { "epoch": 28.23103448275862, "grad_norm": 1.6396808624267578, "learning_rate": 2.957103448275862e-05, "loss": 0.2377, "step": 8187 }, { "epoch": 28.23448275862069, "grad_norm": 0.7919644713401794, "learning_rate": 2.957057471264368e-05, "loss": 0.2015, "step": 8188 }, { "epoch": 28.23793103448276, "grad_norm": 1.163018822669983, "learning_rate": 2.9570114942528738e-05, "loss": 0.2243, "step": 8189 }, { "epoch": 28.24137931034483, "grad_norm": 2.3493218421936035, "learning_rate": 2.9569655172413793e-05, "loss": 0.2109, "step": 8190 }, { "epoch": 28.244827586206895, "grad_norm": 0.9582539200782776, "learning_rate": 2.9569195402298848e-05, "loss": 0.2149, "step": 8191 }, { "epoch": 28.248275862068965, "grad_norm": 1.4057374000549316, "learning_rate": 2.956873563218391e-05, "loss": 0.2182, "step": 8192 }, { "epoch": 28.251724137931035, "grad_norm": 1.7944772243499756, "learning_rate": 2.9568275862068966e-05, "loss": 0.2057, "step": 8193 }, { "epoch": 28.255172413793105, "grad_norm": 1.7510415315628052, "learning_rate": 2.9567816091954024e-05, "loss": 0.2413, "step": 8194 }, { "epoch": 28.25862068965517, "grad_norm": 1.0585309267044067, "learning_rate": 2.956735632183908e-05, "loss": 0.3118, "step": 8195 }, { "epoch": 28.26206896551724, "grad_norm": 0.768367350101471, "learning_rate": 2.9566896551724138e-05, "loss": 0.2677, "step": 8196 }, { "epoch": 28.26551724137931, "grad_norm": 0.49338531494140625, "learning_rate": 2.9566436781609197e-05, "loss": 0.2576, "step": 8197 }, { "epoch": 28.26896551724138, "grad_norm": 1.0966291427612305, "learning_rate": 2.9565977011494252e-05, "loss": 0.3267, "step": 8198 }, { "epoch": 28.27241379310345, "grad_norm": 0.48311135172843933, "learning_rate": 2.956551724137931e-05, "loss": 0.2236, "step": 8199 }, { "epoch": 28.275862068965516, "grad_norm": 0.586529016494751, "learning_rate": 2.956505747126437e-05, "loss": 0.2538, "step": 8200 }, { "epoch": 28.279310344827586, "grad_norm": 0.5380350947380066, "learning_rate": 2.9564597701149425e-05, "loss": 0.2358, "step": 8201 }, { "epoch": 28.282758620689656, "grad_norm": 0.6740065813064575, "learning_rate": 2.9564137931034484e-05, "loss": 0.2463, "step": 8202 }, { "epoch": 28.286206896551725, "grad_norm": 0.8715630173683167, "learning_rate": 2.956367816091954e-05, "loss": 0.2746, "step": 8203 }, { "epoch": 28.28965517241379, "grad_norm": 0.6898650527000427, "learning_rate": 2.95632183908046e-05, "loss": 0.2461, "step": 8204 }, { "epoch": 28.29310344827586, "grad_norm": 0.6051434278488159, "learning_rate": 2.9562758620689656e-05, "loss": 0.2231, "step": 8205 }, { "epoch": 28.29655172413793, "grad_norm": 0.6651365160942078, "learning_rate": 2.956229885057471e-05, "loss": 0.2094, "step": 8206 }, { "epoch": 28.3, "grad_norm": 1.391589641571045, "learning_rate": 2.956183908045977e-05, "loss": 0.2503, "step": 8207 }, { "epoch": 28.30344827586207, "grad_norm": 0.8737332224845886, "learning_rate": 2.956137931034483e-05, "loss": 0.2296, "step": 8208 }, { "epoch": 28.306896551724137, "grad_norm": 0.6046757102012634, "learning_rate": 2.9560919540229888e-05, "loss": 0.2039, "step": 8209 }, { "epoch": 28.310344827586206, "grad_norm": 1.6485631465911865, "learning_rate": 2.9560459770114943e-05, "loss": 0.2012, "step": 8210 }, { "epoch": 28.313793103448276, "grad_norm": 2.351734161376953, "learning_rate": 2.9559999999999998e-05, "loss": 0.2199, "step": 8211 }, { "epoch": 28.317241379310346, "grad_norm": 1.5246859788894653, "learning_rate": 2.955954022988506e-05, "loss": 0.2065, "step": 8212 }, { "epoch": 28.320689655172412, "grad_norm": 1.310571551322937, "learning_rate": 2.9559080459770116e-05, "loss": 0.2176, "step": 8213 }, { "epoch": 28.324137931034482, "grad_norm": 0.8062300086021423, "learning_rate": 2.955862068965517e-05, "loss": 0.2044, "step": 8214 }, { "epoch": 28.32758620689655, "grad_norm": 2.579422950744629, "learning_rate": 2.955816091954023e-05, "loss": 0.208, "step": 8215 }, { "epoch": 28.33103448275862, "grad_norm": 0.923759400844574, "learning_rate": 2.9557701149425288e-05, "loss": 0.1978, "step": 8216 }, { "epoch": 28.33448275862069, "grad_norm": 1.0305286645889282, "learning_rate": 2.9557241379310347e-05, "loss": 0.206, "step": 8217 }, { "epoch": 28.337931034482757, "grad_norm": 1.531119704246521, "learning_rate": 2.9556781609195402e-05, "loss": 0.1906, "step": 8218 }, { "epoch": 28.341379310344827, "grad_norm": 1.029227614402771, "learning_rate": 2.9556321839080457e-05, "loss": 0.2311, "step": 8219 }, { "epoch": 28.344827586206897, "grad_norm": 1.4316705465316772, "learning_rate": 2.955586206896552e-05, "loss": 0.2626, "step": 8220 }, { "epoch": 28.348275862068967, "grad_norm": 0.784063458442688, "learning_rate": 2.9555402298850575e-05, "loss": 0.2849, "step": 8221 }, { "epoch": 28.351724137931033, "grad_norm": 1.1471983194351196, "learning_rate": 2.9554942528735634e-05, "loss": 0.2434, "step": 8222 }, { "epoch": 28.355172413793102, "grad_norm": 0.6912279725074768, "learning_rate": 2.955448275862069e-05, "loss": 0.2484, "step": 8223 }, { "epoch": 28.358620689655172, "grad_norm": 0.6485075950622559, "learning_rate": 2.9554022988505748e-05, "loss": 0.2408, "step": 8224 }, { "epoch": 28.362068965517242, "grad_norm": 1.7720059156417847, "learning_rate": 2.9553563218390806e-05, "loss": 0.2598, "step": 8225 }, { "epoch": 28.36551724137931, "grad_norm": 0.9411681294441223, "learning_rate": 2.955310344827586e-05, "loss": 0.237, "step": 8226 }, { "epoch": 28.368965517241378, "grad_norm": 0.7936299443244934, "learning_rate": 2.955264367816092e-05, "loss": 0.2381, "step": 8227 }, { "epoch": 28.372413793103448, "grad_norm": 1.3931688070297241, "learning_rate": 2.955218390804598e-05, "loss": 0.2469, "step": 8228 }, { "epoch": 28.375862068965517, "grad_norm": 0.5868995785713196, "learning_rate": 2.9551724137931034e-05, "loss": 0.1948, "step": 8229 }, { "epoch": 28.379310344827587, "grad_norm": 0.6621110439300537, "learning_rate": 2.9551264367816093e-05, "loss": 0.242, "step": 8230 }, { "epoch": 28.382758620689657, "grad_norm": 1.165695309638977, "learning_rate": 2.9550804597701148e-05, "loss": 0.2521, "step": 8231 }, { "epoch": 28.386206896551723, "grad_norm": 0.8309202194213867, "learning_rate": 2.955034482758621e-05, "loss": 0.2284, "step": 8232 }, { "epoch": 28.389655172413793, "grad_norm": 1.383903980255127, "learning_rate": 2.9549885057471266e-05, "loss": 0.228, "step": 8233 }, { "epoch": 28.393103448275863, "grad_norm": 0.6596748232841492, "learning_rate": 2.954942528735632e-05, "loss": 0.2062, "step": 8234 }, { "epoch": 28.396551724137932, "grad_norm": 1.5091829299926758, "learning_rate": 2.954896551724138e-05, "loss": 0.2262, "step": 8235 }, { "epoch": 28.4, "grad_norm": 1.164567470550537, "learning_rate": 2.9548505747126438e-05, "loss": 0.2173, "step": 8236 }, { "epoch": 28.40344827586207, "grad_norm": 0.559283971786499, "learning_rate": 2.9548045977011497e-05, "loss": 0.2005, "step": 8237 }, { "epoch": 28.406896551724138, "grad_norm": 0.8490177392959595, "learning_rate": 2.9547586206896552e-05, "loss": 0.2249, "step": 8238 }, { "epoch": 28.410344827586208, "grad_norm": 0.5828081965446472, "learning_rate": 2.9547126436781607e-05, "loss": 0.2044, "step": 8239 }, { "epoch": 28.413793103448278, "grad_norm": 0.7111474871635437, "learning_rate": 2.954666666666667e-05, "loss": 0.1731, "step": 8240 }, { "epoch": 28.417241379310344, "grad_norm": 0.7549071311950684, "learning_rate": 2.9546206896551725e-05, "loss": 0.1882, "step": 8241 }, { "epoch": 28.420689655172414, "grad_norm": 0.9632821083068848, "learning_rate": 2.954574712643678e-05, "loss": 0.2069, "step": 8242 }, { "epoch": 28.424137931034483, "grad_norm": 0.8827874660491943, "learning_rate": 2.954528735632184e-05, "loss": 0.2157, "step": 8243 }, { "epoch": 28.427586206896553, "grad_norm": 1.4626511335372925, "learning_rate": 2.9544827586206897e-05, "loss": 0.241, "step": 8244 }, { "epoch": 28.43103448275862, "grad_norm": 1.740546464920044, "learning_rate": 2.9544367816091956e-05, "loss": 0.3286, "step": 8245 }, { "epoch": 28.43448275862069, "grad_norm": 0.6866878271102905, "learning_rate": 2.954390804597701e-05, "loss": 0.2657, "step": 8246 }, { "epoch": 28.43793103448276, "grad_norm": 0.9644952416419983, "learning_rate": 2.9543448275862067e-05, "loss": 0.2595, "step": 8247 }, { "epoch": 28.44137931034483, "grad_norm": 0.8404403924942017, "learning_rate": 2.954298850574713e-05, "loss": 0.226, "step": 8248 }, { "epoch": 28.444827586206898, "grad_norm": 1.4180538654327393, "learning_rate": 2.9542528735632184e-05, "loss": 0.2495, "step": 8249 }, { "epoch": 28.448275862068964, "grad_norm": 1.1659748554229736, "learning_rate": 2.9542068965517243e-05, "loss": 0.2369, "step": 8250 }, { "epoch": 28.451724137931034, "grad_norm": 0.8892903327941895, "learning_rate": 2.9541609195402298e-05, "loss": 0.2451, "step": 8251 }, { "epoch": 28.455172413793104, "grad_norm": 0.8022037148475647, "learning_rate": 2.9541149425287357e-05, "loss": 0.2791, "step": 8252 }, { "epoch": 28.458620689655174, "grad_norm": 0.6777324676513672, "learning_rate": 2.9540689655172415e-05, "loss": 0.2128, "step": 8253 }, { "epoch": 28.46206896551724, "grad_norm": 1.1573362350463867, "learning_rate": 2.954022988505747e-05, "loss": 0.2224, "step": 8254 }, { "epoch": 28.46551724137931, "grad_norm": 0.5089303255081177, "learning_rate": 2.953977011494253e-05, "loss": 0.2188, "step": 8255 }, { "epoch": 28.46896551724138, "grad_norm": 0.74896639585495, "learning_rate": 2.9539310344827588e-05, "loss": 0.2013, "step": 8256 }, { "epoch": 28.47241379310345, "grad_norm": 0.6058559417724609, "learning_rate": 2.9538850574712643e-05, "loss": 0.206, "step": 8257 }, { "epoch": 28.47586206896552, "grad_norm": 1.2205458879470825, "learning_rate": 2.9538390804597702e-05, "loss": 0.2621, "step": 8258 }, { "epoch": 28.479310344827585, "grad_norm": 0.834604799747467, "learning_rate": 2.9537931034482757e-05, "loss": 0.2387, "step": 8259 }, { "epoch": 28.482758620689655, "grad_norm": 1.2571182250976562, "learning_rate": 2.953747126436782e-05, "loss": 0.2264, "step": 8260 }, { "epoch": 28.486206896551725, "grad_norm": 2.9622206687927246, "learning_rate": 2.9537011494252875e-05, "loss": 0.2277, "step": 8261 }, { "epoch": 28.489655172413794, "grad_norm": 0.7076952457427979, "learning_rate": 2.953655172413793e-05, "loss": 0.2008, "step": 8262 }, { "epoch": 28.49310344827586, "grad_norm": 0.7946977019309998, "learning_rate": 2.953609195402299e-05, "loss": 0.2408, "step": 8263 }, { "epoch": 28.49655172413793, "grad_norm": 1.5567655563354492, "learning_rate": 2.9535632183908047e-05, "loss": 0.2137, "step": 8264 }, { "epoch": 28.5, "grad_norm": 1.2429615259170532, "learning_rate": 2.9535172413793106e-05, "loss": 0.1903, "step": 8265 }, { "epoch": 28.50344827586207, "grad_norm": 1.297597885131836, "learning_rate": 2.953471264367816e-05, "loss": 0.2016, "step": 8266 }, { "epoch": 28.50689655172414, "grad_norm": 1.5456502437591553, "learning_rate": 2.9534252873563217e-05, "loss": 0.2393, "step": 8267 }, { "epoch": 28.510344827586206, "grad_norm": 0.8416969180107117, "learning_rate": 2.953379310344828e-05, "loss": 0.2168, "step": 8268 }, { "epoch": 28.513793103448275, "grad_norm": 1.0120704174041748, "learning_rate": 2.9533333333333334e-05, "loss": 0.2349, "step": 8269 }, { "epoch": 28.517241379310345, "grad_norm": 1.447062373161316, "learning_rate": 2.9532873563218393e-05, "loss": 0.3064, "step": 8270 }, { "epoch": 28.520689655172415, "grad_norm": 0.664036750793457, "learning_rate": 2.9532413793103448e-05, "loss": 0.2468, "step": 8271 }, { "epoch": 28.52413793103448, "grad_norm": 0.5870926380157471, "learning_rate": 2.9531954022988507e-05, "loss": 0.2394, "step": 8272 }, { "epoch": 28.52758620689655, "grad_norm": 0.6911665797233582, "learning_rate": 2.9531494252873565e-05, "loss": 0.2564, "step": 8273 }, { "epoch": 28.53103448275862, "grad_norm": 1.4825079441070557, "learning_rate": 2.953103448275862e-05, "loss": 0.2524, "step": 8274 }, { "epoch": 28.53448275862069, "grad_norm": 0.6221826076507568, "learning_rate": 2.9530574712643676e-05, "loss": 0.2455, "step": 8275 }, { "epoch": 28.53793103448276, "grad_norm": 0.6723572611808777, "learning_rate": 2.9530114942528738e-05, "loss": 0.2404, "step": 8276 }, { "epoch": 28.541379310344826, "grad_norm": 0.9968149065971375, "learning_rate": 2.9529655172413793e-05, "loss": 0.2162, "step": 8277 }, { "epoch": 28.544827586206896, "grad_norm": 0.62615567445755, "learning_rate": 2.9529195402298852e-05, "loss": 0.2402, "step": 8278 }, { "epoch": 28.548275862068966, "grad_norm": 0.6703364253044128, "learning_rate": 2.9528735632183907e-05, "loss": 0.2264, "step": 8279 }, { "epoch": 28.551724137931036, "grad_norm": 0.7296134829521179, "learning_rate": 2.9528275862068966e-05, "loss": 0.2248, "step": 8280 }, { "epoch": 28.555172413793102, "grad_norm": 0.6492160558700562, "learning_rate": 2.9527816091954025e-05, "loss": 0.261, "step": 8281 }, { "epoch": 28.55862068965517, "grad_norm": 2.1196064949035645, "learning_rate": 2.952735632183908e-05, "loss": 0.2366, "step": 8282 }, { "epoch": 28.56206896551724, "grad_norm": 1.4775365591049194, "learning_rate": 2.952689655172414e-05, "loss": 0.2229, "step": 8283 }, { "epoch": 28.56551724137931, "grad_norm": 0.7003260254859924, "learning_rate": 2.9526436781609197e-05, "loss": 0.2119, "step": 8284 }, { "epoch": 28.56896551724138, "grad_norm": 1.2875837087631226, "learning_rate": 2.9525977011494253e-05, "loss": 0.2499, "step": 8285 }, { "epoch": 28.572413793103447, "grad_norm": 0.7175967693328857, "learning_rate": 2.952551724137931e-05, "loss": 0.2148, "step": 8286 }, { "epoch": 28.575862068965517, "grad_norm": 1.3217871189117432, "learning_rate": 2.9525057471264367e-05, "loss": 0.2144, "step": 8287 }, { "epoch": 28.579310344827586, "grad_norm": 0.6104159951210022, "learning_rate": 2.952459770114943e-05, "loss": 0.2091, "step": 8288 }, { "epoch": 28.582758620689656, "grad_norm": 1.702191948890686, "learning_rate": 2.9524137931034484e-05, "loss": 0.2145, "step": 8289 }, { "epoch": 28.586206896551722, "grad_norm": 1.2296067476272583, "learning_rate": 2.952367816091954e-05, "loss": 0.2106, "step": 8290 }, { "epoch": 28.589655172413792, "grad_norm": 0.891712486743927, "learning_rate": 2.9523218390804598e-05, "loss": 0.2207, "step": 8291 }, { "epoch": 28.593103448275862, "grad_norm": 0.675805389881134, "learning_rate": 2.9522758620689657e-05, "loss": 0.205, "step": 8292 }, { "epoch": 28.59655172413793, "grad_norm": 0.9432648420333862, "learning_rate": 2.9522298850574715e-05, "loss": 0.2075, "step": 8293 }, { "epoch": 28.6, "grad_norm": 0.8428996801376343, "learning_rate": 2.952183908045977e-05, "loss": 0.2201, "step": 8294 }, { "epoch": 28.603448275862068, "grad_norm": 1.11347234249115, "learning_rate": 2.9521379310344826e-05, "loss": 0.2522, "step": 8295 }, { "epoch": 28.606896551724137, "grad_norm": 0.615204393863678, "learning_rate": 2.9520919540229888e-05, "loss": 0.3169, "step": 8296 }, { "epoch": 28.610344827586207, "grad_norm": 0.8260067701339722, "learning_rate": 2.9520459770114943e-05, "loss": 0.2441, "step": 8297 }, { "epoch": 28.613793103448277, "grad_norm": 0.5394784212112427, "learning_rate": 2.9520000000000002e-05, "loss": 0.2514, "step": 8298 }, { "epoch": 28.617241379310343, "grad_norm": 1.0030016899108887, "learning_rate": 2.9519540229885057e-05, "loss": 0.2404, "step": 8299 }, { "epoch": 28.620689655172413, "grad_norm": 0.7089954614639282, "learning_rate": 2.9519080459770116e-05, "loss": 0.2477, "step": 8300 }, { "epoch": 28.624137931034483, "grad_norm": 0.7130575180053711, "learning_rate": 2.9518620689655175e-05, "loss": 0.2198, "step": 8301 }, { "epoch": 28.627586206896552, "grad_norm": 0.6604417562484741, "learning_rate": 2.951816091954023e-05, "loss": 0.2253, "step": 8302 }, { "epoch": 28.631034482758622, "grad_norm": 0.6858543157577515, "learning_rate": 2.9517701149425285e-05, "loss": 0.2167, "step": 8303 }, { "epoch": 28.63448275862069, "grad_norm": 0.5661010146141052, "learning_rate": 2.9517241379310347e-05, "loss": 0.1951, "step": 8304 }, { "epoch": 28.637931034482758, "grad_norm": 0.8052709102630615, "learning_rate": 2.9516781609195403e-05, "loss": 0.2138, "step": 8305 }, { "epoch": 28.641379310344828, "grad_norm": 0.9994296431541443, "learning_rate": 2.951632183908046e-05, "loss": 0.2397, "step": 8306 }, { "epoch": 28.644827586206898, "grad_norm": 0.7117496132850647, "learning_rate": 2.9515862068965517e-05, "loss": 0.2366, "step": 8307 }, { "epoch": 28.648275862068964, "grad_norm": 1.0421355962753296, "learning_rate": 2.9515402298850575e-05, "loss": 0.2226, "step": 8308 }, { "epoch": 28.651724137931033, "grad_norm": 1.0415948629379272, "learning_rate": 2.9514942528735634e-05, "loss": 0.2415, "step": 8309 }, { "epoch": 28.655172413793103, "grad_norm": 1.498760461807251, "learning_rate": 2.951448275862069e-05, "loss": 0.2574, "step": 8310 }, { "epoch": 28.658620689655173, "grad_norm": 0.7860624194145203, "learning_rate": 2.9514022988505748e-05, "loss": 0.2255, "step": 8311 }, { "epoch": 28.662068965517243, "grad_norm": 0.6257990598678589, "learning_rate": 2.9513563218390807e-05, "loss": 0.1883, "step": 8312 }, { "epoch": 28.66551724137931, "grad_norm": 0.6154414415359497, "learning_rate": 2.9513103448275862e-05, "loss": 0.2423, "step": 8313 }, { "epoch": 28.66896551724138, "grad_norm": 0.8519198894500732, "learning_rate": 2.951264367816092e-05, "loss": 0.1918, "step": 8314 }, { "epoch": 28.67241379310345, "grad_norm": 0.710910439491272, "learning_rate": 2.9512183908045976e-05, "loss": 0.219, "step": 8315 }, { "epoch": 28.675862068965518, "grad_norm": 1.1328473091125488, "learning_rate": 2.9511724137931038e-05, "loss": 0.2327, "step": 8316 }, { "epoch": 28.679310344827588, "grad_norm": 0.9438102841377258, "learning_rate": 2.9511264367816093e-05, "loss": 0.2193, "step": 8317 }, { "epoch": 28.682758620689654, "grad_norm": 3.735823392868042, "learning_rate": 2.951080459770115e-05, "loss": 0.233, "step": 8318 }, { "epoch": 28.686206896551724, "grad_norm": 2.1295273303985596, "learning_rate": 2.9510344827586207e-05, "loss": 0.2436, "step": 8319 }, { "epoch": 28.689655172413794, "grad_norm": 2.0664968490600586, "learning_rate": 2.9509885057471266e-05, "loss": 0.3452, "step": 8320 }, { "epoch": 28.693103448275863, "grad_norm": 0.6751145124435425, "learning_rate": 2.9509425287356325e-05, "loss": 0.3283, "step": 8321 }, { "epoch": 28.69655172413793, "grad_norm": 0.871094286441803, "learning_rate": 2.950896551724138e-05, "loss": 0.3007, "step": 8322 }, { "epoch": 28.7, "grad_norm": 1.1180874109268188, "learning_rate": 2.9508505747126435e-05, "loss": 0.2679, "step": 8323 }, { "epoch": 28.70344827586207, "grad_norm": 0.6035423874855042, "learning_rate": 2.9508045977011497e-05, "loss": 0.2369, "step": 8324 }, { "epoch": 28.70689655172414, "grad_norm": 0.7087392807006836, "learning_rate": 2.9507586206896553e-05, "loss": 0.2405, "step": 8325 }, { "epoch": 28.71034482758621, "grad_norm": 0.5647522211074829, "learning_rate": 2.950712643678161e-05, "loss": 0.2183, "step": 8326 }, { "epoch": 28.713793103448275, "grad_norm": 0.6678661108016968, "learning_rate": 2.9506666666666667e-05, "loss": 0.2497, "step": 8327 }, { "epoch": 28.717241379310344, "grad_norm": 1.9907597303390503, "learning_rate": 2.9506206896551725e-05, "loss": 0.2501, "step": 8328 }, { "epoch": 28.720689655172414, "grad_norm": 0.7164868712425232, "learning_rate": 2.9505747126436784e-05, "loss": 0.2126, "step": 8329 }, { "epoch": 28.724137931034484, "grad_norm": 1.2822980880737305, "learning_rate": 2.950528735632184e-05, "loss": 0.2265, "step": 8330 }, { "epoch": 28.72758620689655, "grad_norm": 0.6308628916740417, "learning_rate": 2.9504827586206894e-05, "loss": 0.2249, "step": 8331 }, { "epoch": 28.73103448275862, "grad_norm": 0.799608588218689, "learning_rate": 2.9504367816091957e-05, "loss": 0.2467, "step": 8332 }, { "epoch": 28.73448275862069, "grad_norm": 0.7691708207130432, "learning_rate": 2.9503908045977012e-05, "loss": 0.1935, "step": 8333 }, { "epoch": 28.73793103448276, "grad_norm": 0.6377742886543274, "learning_rate": 2.950344827586207e-05, "loss": 0.2168, "step": 8334 }, { "epoch": 28.74137931034483, "grad_norm": 0.6354420185089111, "learning_rate": 2.9502988505747126e-05, "loss": 0.2383, "step": 8335 }, { "epoch": 28.744827586206895, "grad_norm": 0.8633522391319275, "learning_rate": 2.9502528735632184e-05, "loss": 0.2223, "step": 8336 }, { "epoch": 28.748275862068965, "grad_norm": 0.7752782106399536, "learning_rate": 2.9502068965517243e-05, "loss": 0.1835, "step": 8337 }, { "epoch": 28.751724137931035, "grad_norm": 1.0082924365997314, "learning_rate": 2.95016091954023e-05, "loss": 0.2027, "step": 8338 }, { "epoch": 28.755172413793105, "grad_norm": 1.0343326330184937, "learning_rate": 2.9501149425287357e-05, "loss": 0.1825, "step": 8339 }, { "epoch": 28.75862068965517, "grad_norm": 0.9142693281173706, "learning_rate": 2.9500689655172416e-05, "loss": 0.2313, "step": 8340 }, { "epoch": 28.76206896551724, "grad_norm": 0.8922345638275146, "learning_rate": 2.950022988505747e-05, "loss": 0.2177, "step": 8341 }, { "epoch": 28.76551724137931, "grad_norm": 0.7976803779602051, "learning_rate": 2.949977011494253e-05, "loss": 0.2088, "step": 8342 }, { "epoch": 28.76896551724138, "grad_norm": 0.9245702624320984, "learning_rate": 2.9499310344827585e-05, "loss": 0.2384, "step": 8343 }, { "epoch": 28.77241379310345, "grad_norm": 0.7849964499473572, "learning_rate": 2.9498850574712647e-05, "loss": 0.2133, "step": 8344 }, { "epoch": 28.775862068965516, "grad_norm": 1.94073486328125, "learning_rate": 2.9498390804597702e-05, "loss": 0.2794, "step": 8345 }, { "epoch": 28.779310344827586, "grad_norm": 1.3153250217437744, "learning_rate": 2.9497931034482758e-05, "loss": 0.2892, "step": 8346 }, { "epoch": 28.782758620689656, "grad_norm": 1.4830870628356934, "learning_rate": 2.9497471264367816e-05, "loss": 0.245, "step": 8347 }, { "epoch": 28.786206896551725, "grad_norm": 0.9937365651130676, "learning_rate": 2.9497011494252875e-05, "loss": 0.2398, "step": 8348 }, { "epoch": 28.78965517241379, "grad_norm": 0.44604969024658203, "learning_rate": 2.9496551724137934e-05, "loss": 0.234, "step": 8349 }, { "epoch": 28.79310344827586, "grad_norm": 1.4600067138671875, "learning_rate": 2.949609195402299e-05, "loss": 0.2261, "step": 8350 }, { "epoch": 28.79655172413793, "grad_norm": 0.7319642901420593, "learning_rate": 2.9495632183908044e-05, "loss": 0.2273, "step": 8351 }, { "epoch": 28.8, "grad_norm": 0.6779738068580627, "learning_rate": 2.9495172413793106e-05, "loss": 0.2304, "step": 8352 }, { "epoch": 28.80344827586207, "grad_norm": 0.730626106262207, "learning_rate": 2.9494712643678162e-05, "loss": 0.2419, "step": 8353 }, { "epoch": 28.806896551724137, "grad_norm": 1.2741187810897827, "learning_rate": 2.949425287356322e-05, "loss": 0.206, "step": 8354 }, { "epoch": 28.810344827586206, "grad_norm": 0.6518241167068481, "learning_rate": 2.9493793103448276e-05, "loss": 0.2127, "step": 8355 }, { "epoch": 28.813793103448276, "grad_norm": 5.296920299530029, "learning_rate": 2.9493333333333334e-05, "loss": 0.2123, "step": 8356 }, { "epoch": 28.817241379310346, "grad_norm": 0.5563705563545227, "learning_rate": 2.9492873563218393e-05, "loss": 0.2066, "step": 8357 }, { "epoch": 28.820689655172412, "grad_norm": 0.7926074862480164, "learning_rate": 2.949241379310345e-05, "loss": 0.2142, "step": 8358 }, { "epoch": 28.824137931034482, "grad_norm": 1.1204516887664795, "learning_rate": 2.9491954022988507e-05, "loss": 0.2395, "step": 8359 }, { "epoch": 28.82758620689655, "grad_norm": 0.7686488032341003, "learning_rate": 2.9491494252873566e-05, "loss": 0.2311, "step": 8360 }, { "epoch": 28.83103448275862, "grad_norm": 1.1313867568969727, "learning_rate": 2.949103448275862e-05, "loss": 0.2216, "step": 8361 }, { "epoch": 28.83448275862069, "grad_norm": 0.6414045691490173, "learning_rate": 2.949057471264368e-05, "loss": 0.2139, "step": 8362 }, { "epoch": 28.837931034482757, "grad_norm": 0.9884412288665771, "learning_rate": 2.9490114942528735e-05, "loss": 0.1892, "step": 8363 }, { "epoch": 28.841379310344827, "grad_norm": 0.7406110167503357, "learning_rate": 2.9489655172413794e-05, "loss": 0.216, "step": 8364 }, { "epoch": 28.844827586206897, "grad_norm": 0.7509176135063171, "learning_rate": 2.9489195402298852e-05, "loss": 0.2251, "step": 8365 }, { "epoch": 28.848275862068967, "grad_norm": 0.6323858499526978, "learning_rate": 2.9488735632183908e-05, "loss": 0.2092, "step": 8366 }, { "epoch": 28.851724137931036, "grad_norm": 1.293182611465454, "learning_rate": 2.9488275862068966e-05, "loss": 0.2093, "step": 8367 }, { "epoch": 28.855172413793102, "grad_norm": 1.3168213367462158, "learning_rate": 2.9487816091954025e-05, "loss": 0.2015, "step": 8368 }, { "epoch": 28.858620689655172, "grad_norm": 1.1192981004714966, "learning_rate": 2.948735632183908e-05, "loss": 0.2828, "step": 8369 }, { "epoch": 28.862068965517242, "grad_norm": 0.9324017763137817, "learning_rate": 2.948689655172414e-05, "loss": 0.3076, "step": 8370 }, { "epoch": 28.86551724137931, "grad_norm": 1.0421466827392578, "learning_rate": 2.9486436781609194e-05, "loss": 0.2629, "step": 8371 }, { "epoch": 28.868965517241378, "grad_norm": 0.8723673224449158, "learning_rate": 2.9485977011494256e-05, "loss": 0.2632, "step": 8372 }, { "epoch": 28.872413793103448, "grad_norm": 0.6774497628211975, "learning_rate": 2.9485517241379312e-05, "loss": 0.2273, "step": 8373 }, { "epoch": 28.875862068965517, "grad_norm": 0.7376702427864075, "learning_rate": 2.9485057471264367e-05, "loss": 0.2507, "step": 8374 }, { "epoch": 28.879310344827587, "grad_norm": 0.7252164483070374, "learning_rate": 2.9484597701149426e-05, "loss": 0.2499, "step": 8375 }, { "epoch": 28.882758620689657, "grad_norm": 0.7534791827201843, "learning_rate": 2.9484137931034484e-05, "loss": 0.2402, "step": 8376 }, { "epoch": 28.886206896551723, "grad_norm": 0.7938997745513916, "learning_rate": 2.9483678160919543e-05, "loss": 0.2389, "step": 8377 }, { "epoch": 28.889655172413793, "grad_norm": 0.6183886528015137, "learning_rate": 2.94832183908046e-05, "loss": 0.2302, "step": 8378 }, { "epoch": 28.893103448275863, "grad_norm": 0.7597605586051941, "learning_rate": 2.9482758620689654e-05, "loss": 0.2201, "step": 8379 }, { "epoch": 28.896551724137932, "grad_norm": 0.6392091512680054, "learning_rate": 2.9482298850574716e-05, "loss": 0.2314, "step": 8380 }, { "epoch": 28.9, "grad_norm": 0.953340470790863, "learning_rate": 2.948183908045977e-05, "loss": 0.2218, "step": 8381 }, { "epoch": 28.90344827586207, "grad_norm": 2.4331202507019043, "learning_rate": 2.948137931034483e-05, "loss": 0.2531, "step": 8382 }, { "epoch": 28.906896551724138, "grad_norm": 1.9078081846237183, "learning_rate": 2.9480919540229885e-05, "loss": 0.2219, "step": 8383 }, { "epoch": 28.910344827586208, "grad_norm": 0.6311250329017639, "learning_rate": 2.9480459770114944e-05, "loss": 0.2191, "step": 8384 }, { "epoch": 28.913793103448278, "grad_norm": 0.655212938785553, "learning_rate": 2.9480000000000002e-05, "loss": 0.2099, "step": 8385 }, { "epoch": 28.917241379310344, "grad_norm": 1.059177041053772, "learning_rate": 2.9479540229885058e-05, "loss": 0.2147, "step": 8386 }, { "epoch": 28.920689655172414, "grad_norm": 0.7524669170379639, "learning_rate": 2.9479080459770116e-05, "loss": 0.2194, "step": 8387 }, { "epoch": 28.924137931034483, "grad_norm": 0.7482129335403442, "learning_rate": 2.9478620689655175e-05, "loss": 0.1983, "step": 8388 }, { "epoch": 28.927586206896553, "grad_norm": 0.9490127563476562, "learning_rate": 2.947816091954023e-05, "loss": 0.2052, "step": 8389 }, { "epoch": 28.93103448275862, "grad_norm": 1.3511041402816772, "learning_rate": 2.947770114942529e-05, "loss": 0.2199, "step": 8390 }, { "epoch": 28.93448275862069, "grad_norm": 0.8120244145393372, "learning_rate": 2.9477241379310344e-05, "loss": 0.227, "step": 8391 }, { "epoch": 28.93793103448276, "grad_norm": 0.9024966955184937, "learning_rate": 2.9476781609195403e-05, "loss": 0.2328, "step": 8392 }, { "epoch": 28.94137931034483, "grad_norm": 0.8019111752510071, "learning_rate": 2.947632183908046e-05, "loss": 0.2272, "step": 8393 }, { "epoch": 28.944827586206898, "grad_norm": 0.8610515594482422, "learning_rate": 2.9475862068965517e-05, "loss": 0.2418, "step": 8394 }, { "epoch": 28.948275862068964, "grad_norm": 1.894525170326233, "learning_rate": 2.9475402298850576e-05, "loss": 0.3109, "step": 8395 }, { "epoch": 28.951724137931034, "grad_norm": 0.6817851066589355, "learning_rate": 2.9474942528735634e-05, "loss": 0.2701, "step": 8396 }, { "epoch": 28.955172413793104, "grad_norm": 1.1640230417251587, "learning_rate": 2.947448275862069e-05, "loss": 0.2553, "step": 8397 }, { "epoch": 28.958620689655174, "grad_norm": 0.6121415495872498, "learning_rate": 2.9474022988505748e-05, "loss": 0.2455, "step": 8398 }, { "epoch": 28.96206896551724, "grad_norm": 1.5143176317214966, "learning_rate": 2.9473563218390804e-05, "loss": 0.2407, "step": 8399 }, { "epoch": 28.96551724137931, "grad_norm": 0.5894469618797302, "learning_rate": 2.9473103448275866e-05, "loss": 0.2424, "step": 8400 }, { "epoch": 28.96896551724138, "grad_norm": 1.012892484664917, "learning_rate": 2.947264367816092e-05, "loss": 0.2335, "step": 8401 }, { "epoch": 28.97241379310345, "grad_norm": 0.538620114326477, "learning_rate": 2.9472183908045976e-05, "loss": 0.2262, "step": 8402 }, { "epoch": 28.97586206896552, "grad_norm": 0.8520889282226562, "learning_rate": 2.9471724137931035e-05, "loss": 0.1913, "step": 8403 }, { "epoch": 28.979310344827585, "grad_norm": 0.6809514164924622, "learning_rate": 2.9471264367816094e-05, "loss": 0.1972, "step": 8404 }, { "epoch": 28.982758620689655, "grad_norm": 0.6516713500022888, "learning_rate": 2.9470804597701152e-05, "loss": 0.2285, "step": 8405 }, { "epoch": 28.986206896551725, "grad_norm": 0.8028709888458252, "learning_rate": 2.9470344827586208e-05, "loss": 0.2076, "step": 8406 }, { "epoch": 28.989655172413794, "grad_norm": 1.5387905836105347, "learning_rate": 2.9469885057471263e-05, "loss": 0.2219, "step": 8407 }, { "epoch": 28.99310344827586, "grad_norm": 2.758673667907715, "learning_rate": 2.9469425287356325e-05, "loss": 0.2158, "step": 8408 }, { "epoch": 28.99655172413793, "grad_norm": 0.8811305165290833, "learning_rate": 2.946896551724138e-05, "loss": 0.2204, "step": 8409 }, { "epoch": 29.0, "grad_norm": 1.2843360900878906, "learning_rate": 2.946850574712644e-05, "loss": 0.2937, "step": 8410 }, { "epoch": 29.00344827586207, "grad_norm": 0.7024034857749939, "learning_rate": 2.9468045977011494e-05, "loss": 0.243, "step": 8411 }, { "epoch": 29.00689655172414, "grad_norm": 0.44260457158088684, "learning_rate": 2.9467586206896553e-05, "loss": 0.2443, "step": 8412 }, { "epoch": 29.010344827586206, "grad_norm": 0.7628269195556641, "learning_rate": 2.946712643678161e-05, "loss": 0.2472, "step": 8413 }, { "epoch": 29.013793103448275, "grad_norm": 0.6364394426345825, "learning_rate": 2.9466666666666667e-05, "loss": 0.2254, "step": 8414 }, { "epoch": 29.017241379310345, "grad_norm": 0.5341442823410034, "learning_rate": 2.9466206896551726e-05, "loss": 0.2605, "step": 8415 }, { "epoch": 29.020689655172415, "grad_norm": 0.48759791254997253, "learning_rate": 2.9465747126436784e-05, "loss": 0.2203, "step": 8416 }, { "epoch": 29.02413793103448, "grad_norm": 0.679593563079834, "learning_rate": 2.946528735632184e-05, "loss": 0.237, "step": 8417 }, { "epoch": 29.02758620689655, "grad_norm": 0.7085295915603638, "learning_rate": 2.9464827586206898e-05, "loss": 0.2083, "step": 8418 }, { "epoch": 29.03103448275862, "grad_norm": 1.2883946895599365, "learning_rate": 2.9464367816091954e-05, "loss": 0.2271, "step": 8419 }, { "epoch": 29.03448275862069, "grad_norm": 0.8584481477737427, "learning_rate": 2.9463908045977012e-05, "loss": 0.2, "step": 8420 }, { "epoch": 29.03793103448276, "grad_norm": 1.3888745307922363, "learning_rate": 2.946344827586207e-05, "loss": 0.2148, "step": 8421 }, { "epoch": 29.041379310344826, "grad_norm": 0.6546030044555664, "learning_rate": 2.9462988505747126e-05, "loss": 0.1915, "step": 8422 }, { "epoch": 29.044827586206896, "grad_norm": 0.9133983850479126, "learning_rate": 2.9462528735632185e-05, "loss": 0.2381, "step": 8423 }, { "epoch": 29.048275862068966, "grad_norm": 0.9445765018463135, "learning_rate": 2.9462068965517244e-05, "loss": 0.1892, "step": 8424 }, { "epoch": 29.051724137931036, "grad_norm": 0.8365092873573303, "learning_rate": 2.94616091954023e-05, "loss": 0.1831, "step": 8425 }, { "epoch": 29.055172413793102, "grad_norm": 0.9562317728996277, "learning_rate": 2.9461149425287358e-05, "loss": 0.1789, "step": 8426 }, { "epoch": 29.05862068965517, "grad_norm": 1.1244508028030396, "learning_rate": 2.9460689655172413e-05, "loss": 0.193, "step": 8427 }, { "epoch": 29.06206896551724, "grad_norm": 1.0150545835494995, "learning_rate": 2.9460229885057475e-05, "loss": 0.1983, "step": 8428 }, { "epoch": 29.06551724137931, "grad_norm": 0.7360746264457703, "learning_rate": 2.945977011494253e-05, "loss": 0.2438, "step": 8429 }, { "epoch": 29.06896551724138, "grad_norm": 0.8928088545799255, "learning_rate": 2.9459310344827585e-05, "loss": 0.1927, "step": 8430 }, { "epoch": 29.072413793103447, "grad_norm": 0.8396279215812683, "learning_rate": 2.9458850574712644e-05, "loss": 0.1917, "step": 8431 }, { "epoch": 29.075862068965517, "grad_norm": 0.7031256556510925, "learning_rate": 2.9458390804597703e-05, "loss": 0.2239, "step": 8432 }, { "epoch": 29.079310344827586, "grad_norm": 3.5422980785369873, "learning_rate": 2.945793103448276e-05, "loss": 0.2132, "step": 8433 }, { "epoch": 29.082758620689656, "grad_norm": 0.9215010404586792, "learning_rate": 2.9457471264367817e-05, "loss": 0.2111, "step": 8434 }, { "epoch": 29.086206896551722, "grad_norm": 2.6358978748321533, "learning_rate": 2.9457011494252872e-05, "loss": 0.2756, "step": 8435 }, { "epoch": 29.089655172413792, "grad_norm": 0.4905362129211426, "learning_rate": 2.9456551724137934e-05, "loss": 0.2788, "step": 8436 }, { "epoch": 29.093103448275862, "grad_norm": 0.5370981693267822, "learning_rate": 2.945609195402299e-05, "loss": 0.2409, "step": 8437 }, { "epoch": 29.09655172413793, "grad_norm": 1.0018694400787354, "learning_rate": 2.9455632183908048e-05, "loss": 0.2426, "step": 8438 }, { "epoch": 29.1, "grad_norm": 0.6532219052314758, "learning_rate": 2.9455172413793103e-05, "loss": 0.2189, "step": 8439 }, { "epoch": 29.103448275862068, "grad_norm": 0.5290963053703308, "learning_rate": 2.9454712643678162e-05, "loss": 0.2117, "step": 8440 }, { "epoch": 29.106896551724137, "grad_norm": 1.2290993928909302, "learning_rate": 2.945425287356322e-05, "loss": 0.232, "step": 8441 }, { "epoch": 29.110344827586207, "grad_norm": 0.6524094939231873, "learning_rate": 2.9453793103448276e-05, "loss": 0.2169, "step": 8442 }, { "epoch": 29.113793103448277, "grad_norm": 0.6118794083595276, "learning_rate": 2.9453333333333335e-05, "loss": 0.206, "step": 8443 }, { "epoch": 29.117241379310343, "grad_norm": 0.8897814154624939, "learning_rate": 2.9452873563218393e-05, "loss": 0.2042, "step": 8444 }, { "epoch": 29.120689655172413, "grad_norm": 0.7752051949501038, "learning_rate": 2.945241379310345e-05, "loss": 0.2207, "step": 8445 }, { "epoch": 29.124137931034483, "grad_norm": 0.7334558367729187, "learning_rate": 2.9451954022988507e-05, "loss": 0.2351, "step": 8446 }, { "epoch": 29.127586206896552, "grad_norm": 0.6669809818267822, "learning_rate": 2.9451494252873563e-05, "loss": 0.2263, "step": 8447 }, { "epoch": 29.131034482758622, "grad_norm": 0.5889873504638672, "learning_rate": 2.945103448275862e-05, "loss": 0.2158, "step": 8448 }, { "epoch": 29.13448275862069, "grad_norm": 0.6094187498092651, "learning_rate": 2.945057471264368e-05, "loss": 0.1615, "step": 8449 }, { "epoch": 29.137931034482758, "grad_norm": 0.7792372107505798, "learning_rate": 2.9450114942528735e-05, "loss": 0.2267, "step": 8450 }, { "epoch": 29.141379310344828, "grad_norm": 0.9403130412101746, "learning_rate": 2.9449655172413794e-05, "loss": 0.2002, "step": 8451 }, { "epoch": 29.144827586206898, "grad_norm": 0.8165281414985657, "learning_rate": 2.944919540229885e-05, "loss": 0.1808, "step": 8452 }, { "epoch": 29.148275862068967, "grad_norm": 0.7125703692436218, "learning_rate": 2.9448735632183908e-05, "loss": 0.1891, "step": 8453 }, { "epoch": 29.151724137931033, "grad_norm": 1.1508530378341675, "learning_rate": 2.9448275862068967e-05, "loss": 0.2223, "step": 8454 }, { "epoch": 29.155172413793103, "grad_norm": 0.6624377369880676, "learning_rate": 2.9447816091954022e-05, "loss": 0.1969, "step": 8455 }, { "epoch": 29.158620689655173, "grad_norm": 0.951290488243103, "learning_rate": 2.944735632183908e-05, "loss": 0.2119, "step": 8456 }, { "epoch": 29.162068965517243, "grad_norm": 1.1884729862213135, "learning_rate": 2.944689655172414e-05, "loss": 0.1744, "step": 8457 }, { "epoch": 29.16551724137931, "grad_norm": 1.1067265272140503, "learning_rate": 2.9446436781609195e-05, "loss": 0.2016, "step": 8458 }, { "epoch": 29.16896551724138, "grad_norm": 0.8247689604759216, "learning_rate": 2.9445977011494253e-05, "loss": 0.224, "step": 8459 }, { "epoch": 29.17241379310345, "grad_norm": 1.4039545059204102, "learning_rate": 2.944551724137931e-05, "loss": 0.25, "step": 8460 }, { "epoch": 29.175862068965518, "grad_norm": 0.5820530652999878, "learning_rate": 2.944505747126437e-05, "loss": 0.2816, "step": 8461 }, { "epoch": 29.179310344827588, "grad_norm": 0.5318347811698914, "learning_rate": 2.9444597701149426e-05, "loss": 0.2218, "step": 8462 }, { "epoch": 29.182758620689654, "grad_norm": 0.47139686346054077, "learning_rate": 2.944413793103448e-05, "loss": 0.232, "step": 8463 }, { "epoch": 29.186206896551724, "grad_norm": 0.5038096904754639, "learning_rate": 2.944367816091954e-05, "loss": 0.2106, "step": 8464 }, { "epoch": 29.189655172413794, "grad_norm": 0.8219679594039917, "learning_rate": 2.94432183908046e-05, "loss": 0.222, "step": 8465 }, { "epoch": 29.193103448275863, "grad_norm": 0.5482595562934875, "learning_rate": 2.9442758620689657e-05, "loss": 0.2231, "step": 8466 }, { "epoch": 29.19655172413793, "grad_norm": 1.4113280773162842, "learning_rate": 2.9442298850574713e-05, "loss": 0.2428, "step": 8467 }, { "epoch": 29.2, "grad_norm": 0.4948676824569702, "learning_rate": 2.9441839080459768e-05, "loss": 0.2178, "step": 8468 }, { "epoch": 29.20344827586207, "grad_norm": 0.6609182953834534, "learning_rate": 2.944137931034483e-05, "loss": 0.2407, "step": 8469 }, { "epoch": 29.20689655172414, "grad_norm": 0.5814817547798157, "learning_rate": 2.9440919540229885e-05, "loss": 0.1971, "step": 8470 }, { "epoch": 29.21034482758621, "grad_norm": 1.4450095891952515, "learning_rate": 2.9440459770114944e-05, "loss": 0.1897, "step": 8471 }, { "epoch": 29.213793103448275, "grad_norm": 0.8404681086540222, "learning_rate": 2.944e-05, "loss": 0.2037, "step": 8472 }, { "epoch": 29.217241379310344, "grad_norm": 0.4260736107826233, "learning_rate": 2.9439540229885058e-05, "loss": 0.1862, "step": 8473 }, { "epoch": 29.220689655172414, "grad_norm": 1.4235868453979492, "learning_rate": 2.9439080459770117e-05, "loss": 0.2417, "step": 8474 }, { "epoch": 29.224137931034484, "grad_norm": 0.7418224811553955, "learning_rate": 2.9438620689655172e-05, "loss": 0.2303, "step": 8475 }, { "epoch": 29.22758620689655, "grad_norm": 0.7645103931427002, "learning_rate": 2.943816091954023e-05, "loss": 0.2067, "step": 8476 }, { "epoch": 29.23103448275862, "grad_norm": 0.7388969659805298, "learning_rate": 2.943770114942529e-05, "loss": 0.1894, "step": 8477 }, { "epoch": 29.23448275862069, "grad_norm": 1.2340644598007202, "learning_rate": 2.9437241379310345e-05, "loss": 0.2149, "step": 8478 }, { "epoch": 29.23793103448276, "grad_norm": 1.0713930130004883, "learning_rate": 2.9436781609195403e-05, "loss": 0.2075, "step": 8479 }, { "epoch": 29.24137931034483, "grad_norm": 0.772520124912262, "learning_rate": 2.943632183908046e-05, "loss": 0.2074, "step": 8480 }, { "epoch": 29.244827586206895, "grad_norm": 1.028178334236145, "learning_rate": 2.9435862068965517e-05, "loss": 0.2073, "step": 8481 }, { "epoch": 29.248275862068965, "grad_norm": 1.1500747203826904, "learning_rate": 2.9435402298850576e-05, "loss": 0.1841, "step": 8482 }, { "epoch": 29.251724137931035, "grad_norm": 0.7738918662071228, "learning_rate": 2.943494252873563e-05, "loss": 0.1764, "step": 8483 }, { "epoch": 29.255172413793105, "grad_norm": 0.9483346343040466, "learning_rate": 2.943448275862069e-05, "loss": 0.186, "step": 8484 }, { "epoch": 29.25862068965517, "grad_norm": 1.5272263288497925, "learning_rate": 2.943402298850575e-05, "loss": 0.3362, "step": 8485 }, { "epoch": 29.26206896551724, "grad_norm": 0.617757260799408, "learning_rate": 2.9433563218390804e-05, "loss": 0.2805, "step": 8486 }, { "epoch": 29.26551724137931, "grad_norm": 0.9936351180076599, "learning_rate": 2.9433103448275863e-05, "loss": 0.2589, "step": 8487 }, { "epoch": 29.26896551724138, "grad_norm": 0.5394191741943359, "learning_rate": 2.9432643678160918e-05, "loss": 0.2344, "step": 8488 }, { "epoch": 29.27241379310345, "grad_norm": 0.9778397083282471, "learning_rate": 2.943218390804598e-05, "loss": 0.2547, "step": 8489 }, { "epoch": 29.275862068965516, "grad_norm": 0.775196373462677, "learning_rate": 2.9431724137931035e-05, "loss": 0.246, "step": 8490 }, { "epoch": 29.279310344827586, "grad_norm": 0.8126369714736938, "learning_rate": 2.943126436781609e-05, "loss": 0.2166, "step": 8491 }, { "epoch": 29.282758620689656, "grad_norm": 0.977419912815094, "learning_rate": 2.943080459770115e-05, "loss": 0.2519, "step": 8492 }, { "epoch": 29.286206896551725, "grad_norm": 0.9167807102203369, "learning_rate": 2.9430344827586208e-05, "loss": 0.2003, "step": 8493 }, { "epoch": 29.28965517241379, "grad_norm": 0.6145679354667664, "learning_rate": 2.9429885057471267e-05, "loss": 0.2008, "step": 8494 }, { "epoch": 29.29310344827586, "grad_norm": 1.324931263923645, "learning_rate": 2.9429425287356322e-05, "loss": 0.2353, "step": 8495 }, { "epoch": 29.29655172413793, "grad_norm": 1.6240581274032593, "learning_rate": 2.9428965517241377e-05, "loss": 0.2377, "step": 8496 }, { "epoch": 29.3, "grad_norm": 0.6399808526039124, "learning_rate": 2.942850574712644e-05, "loss": 0.1934, "step": 8497 }, { "epoch": 29.30344827586207, "grad_norm": 0.5834258794784546, "learning_rate": 2.9428045977011495e-05, "loss": 0.227, "step": 8498 }, { "epoch": 29.306896551724137, "grad_norm": 0.8762993812561035, "learning_rate": 2.9427586206896553e-05, "loss": 0.2019, "step": 8499 }, { "epoch": 29.310344827586206, "grad_norm": 0.7464197874069214, "learning_rate": 2.942712643678161e-05, "loss": 0.2263, "step": 8500 }, { "epoch": 29.313793103448276, "grad_norm": 0.6835722327232361, "learning_rate": 2.9426666666666667e-05, "loss": 0.221, "step": 8501 }, { "epoch": 29.317241379310346, "grad_norm": 0.7295337319374084, "learning_rate": 2.9426206896551726e-05, "loss": 0.1985, "step": 8502 }, { "epoch": 29.320689655172412, "grad_norm": 0.7071560621261597, "learning_rate": 2.942574712643678e-05, "loss": 0.2148, "step": 8503 }, { "epoch": 29.324137931034482, "grad_norm": 0.8622764945030212, "learning_rate": 2.942528735632184e-05, "loss": 0.2055, "step": 8504 }, { "epoch": 29.32758620689655, "grad_norm": 1.672597050666809, "learning_rate": 2.94248275862069e-05, "loss": 0.2042, "step": 8505 }, { "epoch": 29.33103448275862, "grad_norm": 0.8662086129188538, "learning_rate": 2.9424367816091954e-05, "loss": 0.2048, "step": 8506 }, { "epoch": 29.33448275862069, "grad_norm": 0.8094044327735901, "learning_rate": 2.9423908045977013e-05, "loss": 0.2132, "step": 8507 }, { "epoch": 29.337931034482757, "grad_norm": 0.7870288491249084, "learning_rate": 2.9423448275862068e-05, "loss": 0.223, "step": 8508 }, { "epoch": 29.341379310344827, "grad_norm": 0.7968257069587708, "learning_rate": 2.9422988505747127e-05, "loss": 0.2155, "step": 8509 }, { "epoch": 29.344827586206897, "grad_norm": 2.0013058185577393, "learning_rate": 2.9422528735632185e-05, "loss": 0.2834, "step": 8510 }, { "epoch": 29.348275862068967, "grad_norm": 0.6874477863311768, "learning_rate": 2.942206896551724e-05, "loss": 0.2466, "step": 8511 }, { "epoch": 29.351724137931033, "grad_norm": 0.603545606136322, "learning_rate": 2.94216091954023e-05, "loss": 0.266, "step": 8512 }, { "epoch": 29.355172413793102, "grad_norm": 0.9929205775260925, "learning_rate": 2.9421149425287358e-05, "loss": 0.2503, "step": 8513 }, { "epoch": 29.358620689655172, "grad_norm": 0.5961554050445557, "learning_rate": 2.9420689655172413e-05, "loss": 0.253, "step": 8514 }, { "epoch": 29.362068965517242, "grad_norm": 0.7251795530319214, "learning_rate": 2.9420229885057472e-05, "loss": 0.2231, "step": 8515 }, { "epoch": 29.36551724137931, "grad_norm": 1.0719401836395264, "learning_rate": 2.9419770114942527e-05, "loss": 0.2235, "step": 8516 }, { "epoch": 29.368965517241378, "grad_norm": 0.9208813905715942, "learning_rate": 2.941931034482759e-05, "loss": 0.2203, "step": 8517 }, { "epoch": 29.372413793103448, "grad_norm": 1.096336007118225, "learning_rate": 2.9418850574712645e-05, "loss": 0.2523, "step": 8518 }, { "epoch": 29.375862068965517, "grad_norm": 1.9138771295547485, "learning_rate": 2.94183908045977e-05, "loss": 0.2144, "step": 8519 }, { "epoch": 29.379310344827587, "grad_norm": 1.3280202150344849, "learning_rate": 2.941793103448276e-05, "loss": 0.1923, "step": 8520 }, { "epoch": 29.382758620689657, "grad_norm": 3.172459602355957, "learning_rate": 2.9417471264367817e-05, "loss": 0.2356, "step": 8521 }, { "epoch": 29.386206896551723, "grad_norm": 1.102019190788269, "learning_rate": 2.9417011494252876e-05, "loss": 0.2201, "step": 8522 }, { "epoch": 29.389655172413793, "grad_norm": 1.620155930519104, "learning_rate": 2.941655172413793e-05, "loss": 0.1877, "step": 8523 }, { "epoch": 29.393103448275863, "grad_norm": 0.5938528180122375, "learning_rate": 2.9416091954022986e-05, "loss": 0.2175, "step": 8524 }, { "epoch": 29.396551724137932, "grad_norm": 1.027496099472046, "learning_rate": 2.941563218390805e-05, "loss": 0.2238, "step": 8525 }, { "epoch": 29.4, "grad_norm": 0.8008903861045837, "learning_rate": 2.9415172413793104e-05, "loss": 0.2057, "step": 8526 }, { "epoch": 29.40344827586207, "grad_norm": 3.6104743480682373, "learning_rate": 2.9414712643678163e-05, "loss": 0.2149, "step": 8527 }, { "epoch": 29.406896551724138, "grad_norm": 1.0886818170547485, "learning_rate": 2.9414252873563218e-05, "loss": 0.2003, "step": 8528 }, { "epoch": 29.410344827586208, "grad_norm": 0.7820970416069031, "learning_rate": 2.9413793103448277e-05, "loss": 0.189, "step": 8529 }, { "epoch": 29.413793103448278, "grad_norm": 0.9718925356864929, "learning_rate": 2.9413333333333335e-05, "loss": 0.2239, "step": 8530 }, { "epoch": 29.417241379310344, "grad_norm": 1.2551460266113281, "learning_rate": 2.941287356321839e-05, "loss": 0.2257, "step": 8531 }, { "epoch": 29.420689655172414, "grad_norm": 1.0808199644088745, "learning_rate": 2.941241379310345e-05, "loss": 0.1852, "step": 8532 }, { "epoch": 29.424137931034483, "grad_norm": 0.9568034410476685, "learning_rate": 2.9411954022988508e-05, "loss": 0.209, "step": 8533 }, { "epoch": 29.427586206896553, "grad_norm": 1.0231153964996338, "learning_rate": 2.9411494252873563e-05, "loss": 0.2553, "step": 8534 }, { "epoch": 29.43103448275862, "grad_norm": 1.5513527393341064, "learning_rate": 2.9411034482758622e-05, "loss": 0.2502, "step": 8535 }, { "epoch": 29.43448275862069, "grad_norm": 1.277540922164917, "learning_rate": 2.9410574712643677e-05, "loss": 0.3074, "step": 8536 }, { "epoch": 29.43793103448276, "grad_norm": 0.6253504157066345, "learning_rate": 2.9410114942528736e-05, "loss": 0.2486, "step": 8537 }, { "epoch": 29.44137931034483, "grad_norm": 0.8296213746070862, "learning_rate": 2.9409655172413794e-05, "loss": 0.2365, "step": 8538 }, { "epoch": 29.444827586206898, "grad_norm": 0.5492059588432312, "learning_rate": 2.940919540229885e-05, "loss": 0.2253, "step": 8539 }, { "epoch": 29.448275862068964, "grad_norm": 0.5913786888122559, "learning_rate": 2.940873563218391e-05, "loss": 0.2296, "step": 8540 }, { "epoch": 29.451724137931034, "grad_norm": 0.6980318427085876, "learning_rate": 2.9408275862068967e-05, "loss": 0.2272, "step": 8541 }, { "epoch": 29.455172413793104, "grad_norm": 0.6542199850082397, "learning_rate": 2.9407816091954022e-05, "loss": 0.2404, "step": 8542 }, { "epoch": 29.458620689655174, "grad_norm": 0.7386915683746338, "learning_rate": 2.940735632183908e-05, "loss": 0.2273, "step": 8543 }, { "epoch": 29.46206896551724, "grad_norm": 0.6400183439254761, "learning_rate": 2.9406896551724136e-05, "loss": 0.2317, "step": 8544 }, { "epoch": 29.46551724137931, "grad_norm": 0.7316537499427795, "learning_rate": 2.94064367816092e-05, "loss": 0.2141, "step": 8545 }, { "epoch": 29.46896551724138, "grad_norm": 0.9107458591461182, "learning_rate": 2.9405977011494254e-05, "loss": 0.2105, "step": 8546 }, { "epoch": 29.47241379310345, "grad_norm": 1.0408706665039062, "learning_rate": 2.940551724137931e-05, "loss": 0.1973, "step": 8547 }, { "epoch": 29.47586206896552, "grad_norm": 1.4143403768539429, "learning_rate": 2.9405057471264368e-05, "loss": 0.2073, "step": 8548 }, { "epoch": 29.479310344827585, "grad_norm": 0.5904884338378906, "learning_rate": 2.9404597701149426e-05, "loss": 0.2103, "step": 8549 }, { "epoch": 29.482758620689655, "grad_norm": 1.0549463033676147, "learning_rate": 2.9404137931034485e-05, "loss": 0.2057, "step": 8550 }, { "epoch": 29.486206896551725, "grad_norm": 0.6379077434539795, "learning_rate": 2.940367816091954e-05, "loss": 0.1924, "step": 8551 }, { "epoch": 29.489655172413794, "grad_norm": 0.9892157316207886, "learning_rate": 2.9403218390804596e-05, "loss": 0.2258, "step": 8552 }, { "epoch": 29.49310344827586, "grad_norm": 2.8988497257232666, "learning_rate": 2.9402758620689658e-05, "loss": 0.2064, "step": 8553 }, { "epoch": 29.49655172413793, "grad_norm": 0.5257912874221802, "learning_rate": 2.9402298850574713e-05, "loss": 0.2092, "step": 8554 }, { "epoch": 29.5, "grad_norm": 0.774492084980011, "learning_rate": 2.9401839080459772e-05, "loss": 0.1864, "step": 8555 }, { "epoch": 29.50344827586207, "grad_norm": 1.321515679359436, "learning_rate": 2.9401379310344827e-05, "loss": 0.199, "step": 8556 }, { "epoch": 29.50689655172414, "grad_norm": 1.0267571210861206, "learning_rate": 2.9400919540229886e-05, "loss": 0.2158, "step": 8557 }, { "epoch": 29.510344827586206, "grad_norm": 0.8924285173416138, "learning_rate": 2.9400459770114944e-05, "loss": 0.2213, "step": 8558 }, { "epoch": 29.513793103448275, "grad_norm": 1.1050852537155151, "learning_rate": 2.94e-05, "loss": 0.2237, "step": 8559 }, { "epoch": 29.517241379310345, "grad_norm": 1.4818718433380127, "learning_rate": 2.939954022988506e-05, "loss": 0.2827, "step": 8560 }, { "epoch": 29.520689655172415, "grad_norm": 0.6923932433128357, "learning_rate": 2.9399080459770117e-05, "loss": 0.2806, "step": 8561 }, { "epoch": 29.52413793103448, "grad_norm": 0.5978209376335144, "learning_rate": 2.9398620689655172e-05, "loss": 0.2407, "step": 8562 }, { "epoch": 29.52758620689655, "grad_norm": 0.7237303256988525, "learning_rate": 2.939816091954023e-05, "loss": 0.2508, "step": 8563 }, { "epoch": 29.53103448275862, "grad_norm": 0.713799238204956, "learning_rate": 2.9397701149425286e-05, "loss": 0.2537, "step": 8564 }, { "epoch": 29.53448275862069, "grad_norm": 1.8056926727294922, "learning_rate": 2.9397241379310345e-05, "loss": 0.2439, "step": 8565 }, { "epoch": 29.53793103448276, "grad_norm": 1.7837307453155518, "learning_rate": 2.9396781609195404e-05, "loss": 0.2474, "step": 8566 }, { "epoch": 29.541379310344826, "grad_norm": 0.5693389773368835, "learning_rate": 2.939632183908046e-05, "loss": 0.2409, "step": 8567 }, { "epoch": 29.544827586206896, "grad_norm": 1.0106958150863647, "learning_rate": 2.9395862068965518e-05, "loss": 0.2184, "step": 8568 }, { "epoch": 29.548275862068966, "grad_norm": 0.5792275071144104, "learning_rate": 2.9395402298850576e-05, "loss": 0.2127, "step": 8569 }, { "epoch": 29.551724137931036, "grad_norm": 0.745433509349823, "learning_rate": 2.939494252873563e-05, "loss": 0.2235, "step": 8570 }, { "epoch": 29.555172413793102, "grad_norm": 0.7427014708518982, "learning_rate": 2.939448275862069e-05, "loss": 0.1992, "step": 8571 }, { "epoch": 29.55862068965517, "grad_norm": 0.7384800910949707, "learning_rate": 2.9394022988505746e-05, "loss": 0.2366, "step": 8572 }, { "epoch": 29.56206896551724, "grad_norm": 0.7956397533416748, "learning_rate": 2.9393563218390808e-05, "loss": 0.2023, "step": 8573 }, { "epoch": 29.56551724137931, "grad_norm": 0.5037432909011841, "learning_rate": 2.9393103448275863e-05, "loss": 0.2142, "step": 8574 }, { "epoch": 29.56896551724138, "grad_norm": 0.612984299659729, "learning_rate": 2.939264367816092e-05, "loss": 0.208, "step": 8575 }, { "epoch": 29.572413793103447, "grad_norm": 0.641606330871582, "learning_rate": 2.9392183908045977e-05, "loss": 0.2126, "step": 8576 }, { "epoch": 29.575862068965517, "grad_norm": 0.688430666923523, "learning_rate": 2.9391724137931036e-05, "loss": 0.2217, "step": 8577 }, { "epoch": 29.579310344827586, "grad_norm": 0.8326401114463806, "learning_rate": 2.9391264367816094e-05, "loss": 0.1809, "step": 8578 }, { "epoch": 29.582758620689656, "grad_norm": 0.8822258710861206, "learning_rate": 2.939080459770115e-05, "loss": 0.1793, "step": 8579 }, { "epoch": 29.586206896551722, "grad_norm": 1.5786772966384888, "learning_rate": 2.9390344827586205e-05, "loss": 0.1689, "step": 8580 }, { "epoch": 29.589655172413792, "grad_norm": 0.6473851799964905, "learning_rate": 2.9389885057471267e-05, "loss": 0.2253, "step": 8581 }, { "epoch": 29.593103448275862, "grad_norm": 1.0582424402236938, "learning_rate": 2.9389425287356322e-05, "loss": 0.1915, "step": 8582 }, { "epoch": 29.59655172413793, "grad_norm": 0.9648132920265198, "learning_rate": 2.938896551724138e-05, "loss": 0.2135, "step": 8583 }, { "epoch": 29.6, "grad_norm": 0.8603467345237732, "learning_rate": 2.9388505747126436e-05, "loss": 0.2251, "step": 8584 }, { "epoch": 29.603448275862068, "grad_norm": 1.3342636823654175, "learning_rate": 2.9388045977011495e-05, "loss": 0.2838, "step": 8585 }, { "epoch": 29.606896551724137, "grad_norm": 0.5653431415557861, "learning_rate": 2.9387586206896554e-05, "loss": 0.2756, "step": 8586 }, { "epoch": 29.610344827586207, "grad_norm": 0.6468084454536438, "learning_rate": 2.938712643678161e-05, "loss": 0.2336, "step": 8587 }, { "epoch": 29.613793103448277, "grad_norm": 0.4202195703983307, "learning_rate": 2.9386666666666668e-05, "loss": 0.2395, "step": 8588 }, { "epoch": 29.617241379310343, "grad_norm": 0.5942292809486389, "learning_rate": 2.9386206896551726e-05, "loss": 0.2562, "step": 8589 }, { "epoch": 29.620689655172413, "grad_norm": 1.1588554382324219, "learning_rate": 2.938574712643678e-05, "loss": 0.2151, "step": 8590 }, { "epoch": 29.624137931034483, "grad_norm": 0.6908065676689148, "learning_rate": 2.938528735632184e-05, "loss": 0.2651, "step": 8591 }, { "epoch": 29.627586206896552, "grad_norm": 0.472477525472641, "learning_rate": 2.9384827586206896e-05, "loss": 0.2181, "step": 8592 }, { "epoch": 29.631034482758622, "grad_norm": 1.0093729496002197, "learning_rate": 2.9384367816091958e-05, "loss": 0.2293, "step": 8593 }, { "epoch": 29.63448275862069, "grad_norm": 1.5410795211791992, "learning_rate": 2.9383908045977013e-05, "loss": 0.2152, "step": 8594 }, { "epoch": 29.637931034482758, "grad_norm": 0.7550352811813354, "learning_rate": 2.9383448275862068e-05, "loss": 0.2263, "step": 8595 }, { "epoch": 29.641379310344828, "grad_norm": 0.5717222690582275, "learning_rate": 2.9382988505747127e-05, "loss": 0.2101, "step": 8596 }, { "epoch": 29.644827586206898, "grad_norm": 0.754328727722168, "learning_rate": 2.9382528735632186e-05, "loss": 0.2146, "step": 8597 }, { "epoch": 29.648275862068964, "grad_norm": 0.5177574157714844, "learning_rate": 2.938206896551724e-05, "loss": 0.2013, "step": 8598 }, { "epoch": 29.651724137931033, "grad_norm": 0.6980205774307251, "learning_rate": 2.93816091954023e-05, "loss": 0.2156, "step": 8599 }, { "epoch": 29.655172413793103, "grad_norm": 0.4904625415802002, "learning_rate": 2.9381149425287355e-05, "loss": 0.2025, "step": 8600 }, { "epoch": 29.658620689655173, "grad_norm": 0.6592461466789246, "learning_rate": 2.9380689655172417e-05, "loss": 0.2059, "step": 8601 }, { "epoch": 29.662068965517243, "grad_norm": 0.6597849130630493, "learning_rate": 2.9380229885057472e-05, "loss": 0.2216, "step": 8602 }, { "epoch": 29.66551724137931, "grad_norm": 0.8609221577644348, "learning_rate": 2.9379770114942528e-05, "loss": 0.2179, "step": 8603 }, { "epoch": 29.66896551724138, "grad_norm": 0.7515446543693542, "learning_rate": 2.9379310344827586e-05, "loss": 0.203, "step": 8604 }, { "epoch": 29.67241379310345, "grad_norm": 1.0956437587738037, "learning_rate": 2.9378850574712645e-05, "loss": 0.2066, "step": 8605 }, { "epoch": 29.675862068965518, "grad_norm": 0.7473214864730835, "learning_rate": 2.9378390804597704e-05, "loss": 0.2177, "step": 8606 }, { "epoch": 29.679310344827588, "grad_norm": 1.0701605081558228, "learning_rate": 2.937793103448276e-05, "loss": 0.1809, "step": 8607 }, { "epoch": 29.682758620689654, "grad_norm": 1.0599894523620605, "learning_rate": 2.9377471264367814e-05, "loss": 0.2166, "step": 8608 }, { "epoch": 29.686206896551724, "grad_norm": 1.5910524129867554, "learning_rate": 2.9377011494252876e-05, "loss": 0.2063, "step": 8609 }, { "epoch": 29.689655172413794, "grad_norm": 1.7873320579528809, "learning_rate": 2.937655172413793e-05, "loss": 0.2597, "step": 8610 }, { "epoch": 29.693103448275863, "grad_norm": 0.5286498069763184, "learning_rate": 2.937609195402299e-05, "loss": 0.2835, "step": 8611 }, { "epoch": 29.69655172413793, "grad_norm": 0.8307579755783081, "learning_rate": 2.9375632183908046e-05, "loss": 0.2206, "step": 8612 }, { "epoch": 29.7, "grad_norm": 0.595156729221344, "learning_rate": 2.9375172413793104e-05, "loss": 0.2333, "step": 8613 }, { "epoch": 29.70344827586207, "grad_norm": 1.1119102239608765, "learning_rate": 2.9374712643678163e-05, "loss": 0.2473, "step": 8614 }, { "epoch": 29.70689655172414, "grad_norm": 0.6027334928512573, "learning_rate": 2.9374252873563218e-05, "loss": 0.2458, "step": 8615 }, { "epoch": 29.71034482758621, "grad_norm": 1.1464070081710815, "learning_rate": 2.9373793103448277e-05, "loss": 0.2167, "step": 8616 }, { "epoch": 29.713793103448275, "grad_norm": 0.9474260807037354, "learning_rate": 2.9373333333333336e-05, "loss": 0.2494, "step": 8617 }, { "epoch": 29.717241379310344, "grad_norm": 0.5701731443405151, "learning_rate": 2.937287356321839e-05, "loss": 0.2256, "step": 8618 }, { "epoch": 29.720689655172414, "grad_norm": 1.6176795959472656, "learning_rate": 2.937241379310345e-05, "loss": 0.2046, "step": 8619 }, { "epoch": 29.724137931034484, "grad_norm": 1.990821123123169, "learning_rate": 2.9371954022988505e-05, "loss": 0.2453, "step": 8620 }, { "epoch": 29.72758620689655, "grad_norm": 0.958305835723877, "learning_rate": 2.9371494252873567e-05, "loss": 0.2265, "step": 8621 }, { "epoch": 29.73103448275862, "grad_norm": 0.729954183101654, "learning_rate": 2.9371034482758622e-05, "loss": 0.2453, "step": 8622 }, { "epoch": 29.73448275862069, "grad_norm": 0.8314918279647827, "learning_rate": 2.9370574712643678e-05, "loss": 0.2098, "step": 8623 }, { "epoch": 29.73793103448276, "grad_norm": 0.7068613767623901, "learning_rate": 2.9370114942528736e-05, "loss": 0.2064, "step": 8624 }, { "epoch": 29.74137931034483, "grad_norm": 0.714805543422699, "learning_rate": 2.9369655172413795e-05, "loss": 0.2238, "step": 8625 }, { "epoch": 29.744827586206895, "grad_norm": 1.0022026300430298, "learning_rate": 2.936919540229885e-05, "loss": 0.2146, "step": 8626 }, { "epoch": 29.748275862068965, "grad_norm": 0.6049807071685791, "learning_rate": 2.936873563218391e-05, "loss": 0.2066, "step": 8627 }, { "epoch": 29.751724137931035, "grad_norm": 1.6996084451675415, "learning_rate": 2.9368275862068964e-05, "loss": 0.2168, "step": 8628 }, { "epoch": 29.755172413793105, "grad_norm": 0.834475576877594, "learning_rate": 2.9367816091954026e-05, "loss": 0.2011, "step": 8629 }, { "epoch": 29.75862068965517, "grad_norm": 0.7317615747451782, "learning_rate": 2.936735632183908e-05, "loss": 0.1962, "step": 8630 }, { "epoch": 29.76206896551724, "grad_norm": 1.1257619857788086, "learning_rate": 2.9366896551724137e-05, "loss": 0.2186, "step": 8631 }, { "epoch": 29.76551724137931, "grad_norm": 1.2593938112258911, "learning_rate": 2.9366436781609195e-05, "loss": 0.1842, "step": 8632 }, { "epoch": 29.76896551724138, "grad_norm": 0.9015754461288452, "learning_rate": 2.9365977011494254e-05, "loss": 0.1943, "step": 8633 }, { "epoch": 29.77241379310345, "grad_norm": 1.3874237537384033, "learning_rate": 2.9365517241379313e-05, "loss": 0.2782, "step": 8634 }, { "epoch": 29.775862068965516, "grad_norm": 2.187631130218506, "learning_rate": 2.9365057471264368e-05, "loss": 0.2804, "step": 8635 }, { "epoch": 29.779310344827586, "grad_norm": 0.8842916488647461, "learning_rate": 2.9364597701149423e-05, "loss": 0.3102, "step": 8636 }, { "epoch": 29.782758620689656, "grad_norm": 0.6075575351715088, "learning_rate": 2.9364137931034486e-05, "loss": 0.2676, "step": 8637 }, { "epoch": 29.786206896551725, "grad_norm": 0.5922269821166992, "learning_rate": 2.936367816091954e-05, "loss": 0.2444, "step": 8638 }, { "epoch": 29.78965517241379, "grad_norm": 0.7121717929840088, "learning_rate": 2.93632183908046e-05, "loss": 0.257, "step": 8639 }, { "epoch": 29.79310344827586, "grad_norm": 1.1534892320632935, "learning_rate": 2.9362758620689655e-05, "loss": 0.2388, "step": 8640 }, { "epoch": 29.79655172413793, "grad_norm": 0.6111218333244324, "learning_rate": 2.9362298850574713e-05, "loss": 0.2137, "step": 8641 }, { "epoch": 29.8, "grad_norm": 0.5231217741966248, "learning_rate": 2.9361839080459772e-05, "loss": 0.2231, "step": 8642 }, { "epoch": 29.80344827586207, "grad_norm": 0.751531183719635, "learning_rate": 2.9361379310344827e-05, "loss": 0.2243, "step": 8643 }, { "epoch": 29.806896551724137, "grad_norm": 0.7200953960418701, "learning_rate": 2.9360919540229886e-05, "loss": 0.2327, "step": 8644 }, { "epoch": 29.810344827586206, "grad_norm": 1.0531408786773682, "learning_rate": 2.9360459770114945e-05, "loss": 0.1991, "step": 8645 }, { "epoch": 29.813793103448276, "grad_norm": 0.5214024782180786, "learning_rate": 2.936e-05, "loss": 0.2009, "step": 8646 }, { "epoch": 29.817241379310346, "grad_norm": 0.6239557862281799, "learning_rate": 2.935954022988506e-05, "loss": 0.2132, "step": 8647 }, { "epoch": 29.820689655172412, "grad_norm": 0.7098475694656372, "learning_rate": 2.9359080459770114e-05, "loss": 0.2426, "step": 8648 }, { "epoch": 29.824137931034482, "grad_norm": 1.8348937034606934, "learning_rate": 2.9358620689655176e-05, "loss": 0.2019, "step": 8649 }, { "epoch": 29.82758620689655, "grad_norm": 0.6597878336906433, "learning_rate": 2.935816091954023e-05, "loss": 0.2024, "step": 8650 }, { "epoch": 29.83103448275862, "grad_norm": 0.4979099929332733, "learning_rate": 2.9357701149425287e-05, "loss": 0.1979, "step": 8651 }, { "epoch": 29.83448275862069, "grad_norm": 0.621502161026001, "learning_rate": 2.9357241379310345e-05, "loss": 0.206, "step": 8652 }, { "epoch": 29.837931034482757, "grad_norm": 0.78814697265625, "learning_rate": 2.9356781609195404e-05, "loss": 0.1933, "step": 8653 }, { "epoch": 29.841379310344827, "grad_norm": 1.3966001272201538, "learning_rate": 2.935632183908046e-05, "loss": 0.2058, "step": 8654 }, { "epoch": 29.844827586206897, "grad_norm": 0.8921729922294617, "learning_rate": 2.9355862068965518e-05, "loss": 0.2053, "step": 8655 }, { "epoch": 29.848275862068967, "grad_norm": 1.0679399967193604, "learning_rate": 2.9355402298850573e-05, "loss": 0.1887, "step": 8656 }, { "epoch": 29.851724137931036, "grad_norm": 1.1467162370681763, "learning_rate": 2.9354942528735635e-05, "loss": 0.2031, "step": 8657 }, { "epoch": 29.855172413793102, "grad_norm": 1.1041758060455322, "learning_rate": 2.935448275862069e-05, "loss": 0.1995, "step": 8658 }, { "epoch": 29.858620689655172, "grad_norm": 1.287103295326233, "learning_rate": 2.9354022988505746e-05, "loss": 0.2321, "step": 8659 }, { "epoch": 29.862068965517242, "grad_norm": 1.301393747329712, "learning_rate": 2.9353563218390805e-05, "loss": 0.2639, "step": 8660 }, { "epoch": 29.86551724137931, "grad_norm": 0.70135498046875, "learning_rate": 2.9353103448275863e-05, "loss": 0.2668, "step": 8661 }, { "epoch": 29.868965517241378, "grad_norm": 0.7361525297164917, "learning_rate": 2.9352643678160922e-05, "loss": 0.2553, "step": 8662 }, { "epoch": 29.872413793103448, "grad_norm": 1.6579993963241577, "learning_rate": 2.9352183908045977e-05, "loss": 0.2499, "step": 8663 }, { "epoch": 29.875862068965517, "grad_norm": 0.7581803202629089, "learning_rate": 2.9351724137931033e-05, "loss": 0.2282, "step": 8664 }, { "epoch": 29.879310344827587, "grad_norm": 0.8727822303771973, "learning_rate": 2.9351264367816095e-05, "loss": 0.2249, "step": 8665 }, { "epoch": 29.882758620689657, "grad_norm": 0.5612066388130188, "learning_rate": 2.935080459770115e-05, "loss": 0.2151, "step": 8666 }, { "epoch": 29.886206896551723, "grad_norm": 1.196216344833374, "learning_rate": 2.935034482758621e-05, "loss": 0.214, "step": 8667 }, { "epoch": 29.889655172413793, "grad_norm": 0.8226383924484253, "learning_rate": 2.9349885057471264e-05, "loss": 0.2257, "step": 8668 }, { "epoch": 29.893103448275863, "grad_norm": 0.6665430665016174, "learning_rate": 2.9349425287356323e-05, "loss": 0.1915, "step": 8669 }, { "epoch": 29.896551724137932, "grad_norm": 0.7111658453941345, "learning_rate": 2.934896551724138e-05, "loss": 0.2241, "step": 8670 }, { "epoch": 29.9, "grad_norm": 0.6269335746765137, "learning_rate": 2.9348505747126437e-05, "loss": 0.22, "step": 8671 }, { "epoch": 29.90344827586207, "grad_norm": 0.5936538577079773, "learning_rate": 2.9348045977011495e-05, "loss": 0.2206, "step": 8672 }, { "epoch": 29.906896551724138, "grad_norm": 0.8234629034996033, "learning_rate": 2.9347586206896554e-05, "loss": 0.2213, "step": 8673 }, { "epoch": 29.910344827586208, "grad_norm": 0.7905863523483276, "learning_rate": 2.934712643678161e-05, "loss": 0.213, "step": 8674 }, { "epoch": 29.913793103448278, "grad_norm": 0.7021034955978394, "learning_rate": 2.9346666666666668e-05, "loss": 0.2553, "step": 8675 }, { "epoch": 29.917241379310344, "grad_norm": 0.6896199584007263, "learning_rate": 2.9346206896551723e-05, "loss": 0.2105, "step": 8676 }, { "epoch": 29.920689655172414, "grad_norm": 0.5245978832244873, "learning_rate": 2.9345747126436785e-05, "loss": 0.1957, "step": 8677 }, { "epoch": 29.924137931034483, "grad_norm": 0.8294116854667664, "learning_rate": 2.934528735632184e-05, "loss": 0.208, "step": 8678 }, { "epoch": 29.927586206896553, "grad_norm": 1.005845546722412, "learning_rate": 2.9344827586206896e-05, "loss": 0.1846, "step": 8679 }, { "epoch": 29.93103448275862, "grad_norm": 1.599161982536316, "learning_rate": 2.9344367816091955e-05, "loss": 0.1763, "step": 8680 }, { "epoch": 29.93448275862069, "grad_norm": 0.7113524079322815, "learning_rate": 2.9343908045977013e-05, "loss": 0.1865, "step": 8681 }, { "epoch": 29.93793103448276, "grad_norm": 1.3711423873901367, "learning_rate": 2.9343448275862072e-05, "loss": 0.1909, "step": 8682 }, { "epoch": 29.94137931034483, "grad_norm": 0.922446072101593, "learning_rate": 2.9342988505747127e-05, "loss": 0.2039, "step": 8683 }, { "epoch": 29.944827586206898, "grad_norm": 2.4140586853027344, "learning_rate": 2.9342528735632183e-05, "loss": 0.1989, "step": 8684 }, { "epoch": 29.948275862068964, "grad_norm": 1.4609438180923462, "learning_rate": 2.9342068965517245e-05, "loss": 0.291, "step": 8685 }, { "epoch": 29.951724137931034, "grad_norm": 1.1451165676116943, "learning_rate": 2.93416091954023e-05, "loss": 0.2804, "step": 8686 }, { "epoch": 29.955172413793104, "grad_norm": 1.3929418325424194, "learning_rate": 2.9341149425287355e-05, "loss": 0.2377, "step": 8687 }, { "epoch": 29.958620689655174, "grad_norm": 0.5720802545547485, "learning_rate": 2.9340689655172414e-05, "loss": 0.2186, "step": 8688 }, { "epoch": 29.96206896551724, "grad_norm": 0.6452265977859497, "learning_rate": 2.9340229885057473e-05, "loss": 0.2281, "step": 8689 }, { "epoch": 29.96551724137931, "grad_norm": 1.1234354972839355, "learning_rate": 2.933977011494253e-05, "loss": 0.2502, "step": 8690 }, { "epoch": 29.96896551724138, "grad_norm": 0.7088279128074646, "learning_rate": 2.9339310344827587e-05, "loss": 0.2131, "step": 8691 }, { "epoch": 29.97241379310345, "grad_norm": 0.8202393054962158, "learning_rate": 2.9338850574712642e-05, "loss": 0.2086, "step": 8692 }, { "epoch": 29.97586206896552, "grad_norm": 0.5350325107574463, "learning_rate": 2.9338390804597704e-05, "loss": 0.2135, "step": 8693 }, { "epoch": 29.979310344827585, "grad_norm": 0.8553653955459595, "learning_rate": 2.933793103448276e-05, "loss": 0.2349, "step": 8694 }, { "epoch": 29.982758620689655, "grad_norm": 0.6570316553115845, "learning_rate": 2.9337471264367818e-05, "loss": 0.1901, "step": 8695 }, { "epoch": 29.986206896551725, "grad_norm": 1.5945117473602295, "learning_rate": 2.9337011494252873e-05, "loss": 0.2076, "step": 8696 }, { "epoch": 29.989655172413794, "grad_norm": 0.8211422562599182, "learning_rate": 2.9336551724137932e-05, "loss": 0.2099, "step": 8697 }, { "epoch": 29.99310344827586, "grad_norm": 0.81424480676651, "learning_rate": 2.933609195402299e-05, "loss": 0.1973, "step": 8698 }, { "epoch": 29.99655172413793, "grad_norm": 0.8380650281906128, "learning_rate": 2.9335632183908046e-05, "loss": 0.1964, "step": 8699 }, { "epoch": 30.0, "grad_norm": 1.0383663177490234, "learning_rate": 2.9335172413793105e-05, "loss": 0.2603, "step": 8700 }, { "epoch": 30.00344827586207, "grad_norm": 0.7438857555389404, "learning_rate": 2.9334712643678163e-05, "loss": 0.2908, "step": 8701 }, { "epoch": 30.00689655172414, "grad_norm": 0.7142217755317688, "learning_rate": 2.933425287356322e-05, "loss": 0.2494, "step": 8702 }, { "epoch": 30.010344827586206, "grad_norm": 1.1346741914749146, "learning_rate": 2.9333793103448277e-05, "loss": 0.2282, "step": 8703 }, { "epoch": 30.013793103448275, "grad_norm": 0.571052074432373, "learning_rate": 2.9333333333333333e-05, "loss": 0.2097, "step": 8704 }, { "epoch": 30.017241379310345, "grad_norm": 0.6793328523635864, "learning_rate": 2.9332873563218395e-05, "loss": 0.2092, "step": 8705 }, { "epoch": 30.020689655172415, "grad_norm": 0.5436508059501648, "learning_rate": 2.933241379310345e-05, "loss": 0.1907, "step": 8706 }, { "epoch": 30.02413793103448, "grad_norm": 1.2438628673553467, "learning_rate": 2.9331954022988505e-05, "loss": 0.2107, "step": 8707 }, { "epoch": 30.02758620689655, "grad_norm": 0.98056560754776, "learning_rate": 2.9331494252873564e-05, "loss": 0.223, "step": 8708 }, { "epoch": 30.03103448275862, "grad_norm": 0.5863348245620728, "learning_rate": 2.9331034482758623e-05, "loss": 0.2164, "step": 8709 }, { "epoch": 30.03448275862069, "grad_norm": 0.5364799499511719, "learning_rate": 2.933057471264368e-05, "loss": 0.208, "step": 8710 }, { "epoch": 30.03793103448276, "grad_norm": 0.8692753314971924, "learning_rate": 2.9330114942528737e-05, "loss": 0.2048, "step": 8711 }, { "epoch": 30.041379310344826, "grad_norm": 0.6529951691627502, "learning_rate": 2.9329655172413792e-05, "loss": 0.2215, "step": 8712 }, { "epoch": 30.044827586206896, "grad_norm": 0.6545840501785278, "learning_rate": 2.9329195402298854e-05, "loss": 0.2058, "step": 8713 }, { "epoch": 30.048275862068966, "grad_norm": 1.437315583229065, "learning_rate": 2.932873563218391e-05, "loss": 0.2015, "step": 8714 }, { "epoch": 30.051724137931036, "grad_norm": 0.803970217704773, "learning_rate": 2.9328275862068965e-05, "loss": 0.2099, "step": 8715 }, { "epoch": 30.055172413793102, "grad_norm": 0.984642744064331, "learning_rate": 2.9327816091954023e-05, "loss": 0.1984, "step": 8716 }, { "epoch": 30.05862068965517, "grad_norm": 0.7948376536369324, "learning_rate": 2.9327356321839082e-05, "loss": 0.1951, "step": 8717 }, { "epoch": 30.06206896551724, "grad_norm": 1.0787266492843628, "learning_rate": 2.932689655172414e-05, "loss": 0.1961, "step": 8718 }, { "epoch": 30.06551724137931, "grad_norm": 0.6122605204582214, "learning_rate": 2.9326436781609196e-05, "loss": 0.2074, "step": 8719 }, { "epoch": 30.06896551724138, "grad_norm": 1.0589922666549683, "learning_rate": 2.932597701149425e-05, "loss": 0.2022, "step": 8720 }, { "epoch": 30.072413793103447, "grad_norm": 0.9752377271652222, "learning_rate": 2.9325517241379313e-05, "loss": 0.1804, "step": 8721 }, { "epoch": 30.075862068965517, "grad_norm": 1.3555039167404175, "learning_rate": 2.932505747126437e-05, "loss": 0.1641, "step": 8722 }, { "epoch": 30.079310344827586, "grad_norm": 1.0553662776947021, "learning_rate": 2.9324597701149427e-05, "loss": 0.1885, "step": 8723 }, { "epoch": 30.082758620689656, "grad_norm": 6.136494159698486, "learning_rate": 2.9324137931034483e-05, "loss": 0.2184, "step": 8724 }, { "epoch": 30.086206896551722, "grad_norm": 1.5088729858398438, "learning_rate": 2.932367816091954e-05, "loss": 0.2775, "step": 8725 }, { "epoch": 30.089655172413792, "grad_norm": 0.686705470085144, "learning_rate": 2.93232183908046e-05, "loss": 0.2634, "step": 8726 }, { "epoch": 30.093103448275862, "grad_norm": 0.6603859066963196, "learning_rate": 2.9322758620689655e-05, "loss": 0.2629, "step": 8727 }, { "epoch": 30.09655172413793, "grad_norm": 1.7419995069503784, "learning_rate": 2.9322298850574714e-05, "loss": 0.236, "step": 8728 }, { "epoch": 30.1, "grad_norm": 0.9669407606124878, "learning_rate": 2.9321839080459773e-05, "loss": 0.2362, "step": 8729 }, { "epoch": 30.103448275862068, "grad_norm": 0.5677290558815002, "learning_rate": 2.9321379310344828e-05, "loss": 0.2221, "step": 8730 }, { "epoch": 30.106896551724137, "grad_norm": 0.9608299732208252, "learning_rate": 2.9320919540229887e-05, "loss": 0.2372, "step": 8731 }, { "epoch": 30.110344827586207, "grad_norm": 0.6838240027427673, "learning_rate": 2.9320459770114942e-05, "loss": 0.2252, "step": 8732 }, { "epoch": 30.113793103448277, "grad_norm": 0.5670236349105835, "learning_rate": 2.9320000000000004e-05, "loss": 0.2035, "step": 8733 }, { "epoch": 30.117241379310343, "grad_norm": 0.7104074358940125, "learning_rate": 2.931954022988506e-05, "loss": 0.1983, "step": 8734 }, { "epoch": 30.120689655172413, "grad_norm": 0.8269029855728149, "learning_rate": 2.9319080459770114e-05, "loss": 0.1985, "step": 8735 }, { "epoch": 30.124137931034483, "grad_norm": 0.4798024892807007, "learning_rate": 2.9318620689655173e-05, "loss": 0.1828, "step": 8736 }, { "epoch": 30.127586206896552, "grad_norm": 0.8468704223632812, "learning_rate": 2.9318160919540232e-05, "loss": 0.2326, "step": 8737 }, { "epoch": 30.131034482758622, "grad_norm": 3.002102851867676, "learning_rate": 2.931770114942529e-05, "loss": 0.2108, "step": 8738 }, { "epoch": 30.13448275862069, "grad_norm": 0.7331115007400513, "learning_rate": 2.9317241379310346e-05, "loss": 0.2044, "step": 8739 }, { "epoch": 30.137931034482758, "grad_norm": 0.6651249527931213, "learning_rate": 2.93167816091954e-05, "loss": 0.2049, "step": 8740 }, { "epoch": 30.141379310344828, "grad_norm": 1.245772123336792, "learning_rate": 2.9316321839080463e-05, "loss": 0.1809, "step": 8741 }, { "epoch": 30.144827586206898, "grad_norm": 0.7006087303161621, "learning_rate": 2.931586206896552e-05, "loss": 0.2278, "step": 8742 }, { "epoch": 30.148275862068967, "grad_norm": 0.8433659076690674, "learning_rate": 2.9315402298850574e-05, "loss": 0.1941, "step": 8743 }, { "epoch": 30.151724137931033, "grad_norm": 1.0693089962005615, "learning_rate": 2.9314942528735632e-05, "loss": 0.2045, "step": 8744 }, { "epoch": 30.155172413793103, "grad_norm": 0.7195056676864624, "learning_rate": 2.931448275862069e-05, "loss": 0.1806, "step": 8745 }, { "epoch": 30.158620689655173, "grad_norm": 1.9944993257522583, "learning_rate": 2.931402298850575e-05, "loss": 0.2076, "step": 8746 }, { "epoch": 30.162068965517243, "grad_norm": 0.7385398745536804, "learning_rate": 2.9313563218390805e-05, "loss": 0.1792, "step": 8747 }, { "epoch": 30.16551724137931, "grad_norm": 0.9807063341140747, "learning_rate": 2.931310344827586e-05, "loss": 0.2075, "step": 8748 }, { "epoch": 30.16896551724138, "grad_norm": 2.3446359634399414, "learning_rate": 2.9312643678160922e-05, "loss": 0.2134, "step": 8749 }, { "epoch": 30.17241379310345, "grad_norm": 1.9261524677276611, "learning_rate": 2.9312183908045978e-05, "loss": 0.2578, "step": 8750 }, { "epoch": 30.175862068965518, "grad_norm": 0.49998220801353455, "learning_rate": 2.9311724137931036e-05, "loss": 0.2861, "step": 8751 }, { "epoch": 30.179310344827588, "grad_norm": 0.6750854253768921, "learning_rate": 2.9311264367816092e-05, "loss": 0.2483, "step": 8752 }, { "epoch": 30.182758620689654, "grad_norm": 0.6338655352592468, "learning_rate": 2.9310804597701147e-05, "loss": 0.198, "step": 8753 }, { "epoch": 30.186206896551724, "grad_norm": 0.6344195008277893, "learning_rate": 2.931034482758621e-05, "loss": 0.2169, "step": 8754 }, { "epoch": 30.189655172413794, "grad_norm": 0.6664438843727112, "learning_rate": 2.9309885057471264e-05, "loss": 0.2222, "step": 8755 }, { "epoch": 30.193103448275863, "grad_norm": 0.8995263576507568, "learning_rate": 2.9309425287356323e-05, "loss": 0.2155, "step": 8756 }, { "epoch": 30.19655172413793, "grad_norm": 0.8597977161407471, "learning_rate": 2.930896551724138e-05, "loss": 0.2508, "step": 8757 }, { "epoch": 30.2, "grad_norm": 0.7814299464225769, "learning_rate": 2.9308505747126437e-05, "loss": 0.216, "step": 8758 }, { "epoch": 30.20344827586207, "grad_norm": 0.9065577983856201, "learning_rate": 2.9308045977011496e-05, "loss": 0.2101, "step": 8759 }, { "epoch": 30.20689655172414, "grad_norm": 0.669593870639801, "learning_rate": 2.930758620689655e-05, "loss": 0.2087, "step": 8760 }, { "epoch": 30.21034482758621, "grad_norm": 0.4804076552391052, "learning_rate": 2.930712643678161e-05, "loss": 0.2189, "step": 8761 }, { "epoch": 30.213793103448275, "grad_norm": 0.6305126547813416, "learning_rate": 2.930666666666667e-05, "loss": 0.2195, "step": 8762 }, { "epoch": 30.217241379310344, "grad_norm": 0.671833336353302, "learning_rate": 2.9306206896551724e-05, "loss": 0.1882, "step": 8763 }, { "epoch": 30.220689655172414, "grad_norm": 0.5995152592658997, "learning_rate": 2.9305747126436782e-05, "loss": 0.2096, "step": 8764 }, { "epoch": 30.224137931034484, "grad_norm": 0.6381880640983582, "learning_rate": 2.9305287356321838e-05, "loss": 0.2051, "step": 8765 }, { "epoch": 30.22758620689655, "grad_norm": 0.9578496217727661, "learning_rate": 2.93048275862069e-05, "loss": 0.19, "step": 8766 }, { "epoch": 30.23103448275862, "grad_norm": 0.7487747073173523, "learning_rate": 2.9304367816091955e-05, "loss": 0.1775, "step": 8767 }, { "epoch": 30.23448275862069, "grad_norm": 0.7152164578437805, "learning_rate": 2.930390804597701e-05, "loss": 0.1932, "step": 8768 }, { "epoch": 30.23793103448276, "grad_norm": 0.909809410572052, "learning_rate": 2.930344827586207e-05, "loss": 0.231, "step": 8769 }, { "epoch": 30.24137931034483, "grad_norm": 0.8571061491966248, "learning_rate": 2.9302988505747128e-05, "loss": 0.2049, "step": 8770 }, { "epoch": 30.244827586206895, "grad_norm": 0.6587442755699158, "learning_rate": 2.9302528735632186e-05, "loss": 0.1983, "step": 8771 }, { "epoch": 30.248275862068965, "grad_norm": 0.8823628425598145, "learning_rate": 2.9302068965517242e-05, "loss": 0.1859, "step": 8772 }, { "epoch": 30.251724137931035, "grad_norm": 0.9013725519180298, "learning_rate": 2.9301609195402297e-05, "loss": 0.191, "step": 8773 }, { "epoch": 30.255172413793105, "grad_norm": 1.1221928596496582, "learning_rate": 2.930114942528736e-05, "loss": 0.1805, "step": 8774 }, { "epoch": 30.25862068965517, "grad_norm": 1.5241210460662842, "learning_rate": 2.9300689655172414e-05, "loss": 0.2454, "step": 8775 }, { "epoch": 30.26206896551724, "grad_norm": 0.9623567461967468, "learning_rate": 2.930022988505747e-05, "loss": 0.2596, "step": 8776 }, { "epoch": 30.26551724137931, "grad_norm": 0.7084299325942993, "learning_rate": 2.929977011494253e-05, "loss": 0.2605, "step": 8777 }, { "epoch": 30.26896551724138, "grad_norm": 0.5393843650817871, "learning_rate": 2.9299310344827587e-05, "loss": 0.2272, "step": 8778 }, { "epoch": 30.27241379310345, "grad_norm": 0.5785841345787048, "learning_rate": 2.9298850574712646e-05, "loss": 0.2295, "step": 8779 }, { "epoch": 30.275862068965516, "grad_norm": 0.7508633732795715, "learning_rate": 2.92983908045977e-05, "loss": 0.2135, "step": 8780 }, { "epoch": 30.279310344827586, "grad_norm": 1.3206374645233154, "learning_rate": 2.9297931034482756e-05, "loss": 0.2246, "step": 8781 }, { "epoch": 30.282758620689656, "grad_norm": 0.9079409241676331, "learning_rate": 2.929747126436782e-05, "loss": 0.2169, "step": 8782 }, { "epoch": 30.286206896551725, "grad_norm": 0.9144492149353027, "learning_rate": 2.9297011494252874e-05, "loss": 0.1883, "step": 8783 }, { "epoch": 30.28965517241379, "grad_norm": 0.8418628573417664, "learning_rate": 2.9296551724137932e-05, "loss": 0.2077, "step": 8784 }, { "epoch": 30.29310344827586, "grad_norm": 1.5497639179229736, "learning_rate": 2.9296091954022988e-05, "loss": 0.204, "step": 8785 }, { "epoch": 30.29655172413793, "grad_norm": 0.7962421178817749, "learning_rate": 2.9295632183908046e-05, "loss": 0.2102, "step": 8786 }, { "epoch": 30.3, "grad_norm": 1.1703613996505737, "learning_rate": 2.9295172413793105e-05, "loss": 0.1989, "step": 8787 }, { "epoch": 30.30344827586207, "grad_norm": 0.6774271726608276, "learning_rate": 2.929471264367816e-05, "loss": 0.2157, "step": 8788 }, { "epoch": 30.306896551724137, "grad_norm": 1.4924513101577759, "learning_rate": 2.929425287356322e-05, "loss": 0.196, "step": 8789 }, { "epoch": 30.310344827586206, "grad_norm": 1.1562117338180542, "learning_rate": 2.9293793103448278e-05, "loss": 0.2036, "step": 8790 }, { "epoch": 30.313793103448276, "grad_norm": 0.7279473543167114, "learning_rate": 2.9293333333333333e-05, "loss": 0.2239, "step": 8791 }, { "epoch": 30.317241379310346, "grad_norm": 1.0067263841629028, "learning_rate": 2.929287356321839e-05, "loss": 0.1693, "step": 8792 }, { "epoch": 30.320689655172412, "grad_norm": 0.8508703112602234, "learning_rate": 2.9292413793103447e-05, "loss": 0.2001, "step": 8793 }, { "epoch": 30.324137931034482, "grad_norm": 3.3256843090057373, "learning_rate": 2.929195402298851e-05, "loss": 0.1975, "step": 8794 }, { "epoch": 30.32758620689655, "grad_norm": 0.8122677206993103, "learning_rate": 2.9291494252873564e-05, "loss": 0.1942, "step": 8795 }, { "epoch": 30.33103448275862, "grad_norm": 0.797671377658844, "learning_rate": 2.929103448275862e-05, "loss": 0.2144, "step": 8796 }, { "epoch": 30.33448275862069, "grad_norm": 0.9274055361747742, "learning_rate": 2.9290574712643678e-05, "loss": 0.1741, "step": 8797 }, { "epoch": 30.337931034482757, "grad_norm": 0.9793065190315247, "learning_rate": 2.9290114942528737e-05, "loss": 0.183, "step": 8798 }, { "epoch": 30.341379310344827, "grad_norm": 0.9779834151268005, "learning_rate": 2.9289655172413796e-05, "loss": 0.247, "step": 8799 }, { "epoch": 30.344827586206897, "grad_norm": 1.2292778491973877, "learning_rate": 2.928919540229885e-05, "loss": 0.2726, "step": 8800 }, { "epoch": 30.348275862068967, "grad_norm": 0.6643134355545044, "learning_rate": 2.9288735632183906e-05, "loss": 0.3136, "step": 8801 }, { "epoch": 30.351724137931033, "grad_norm": 8.596054077148438, "learning_rate": 2.9288275862068968e-05, "loss": 0.2297, "step": 8802 }, { "epoch": 30.355172413793102, "grad_norm": 0.7979637980461121, "learning_rate": 2.9287816091954024e-05, "loss": 0.2388, "step": 8803 }, { "epoch": 30.358620689655172, "grad_norm": 1.2403696775436401, "learning_rate": 2.928735632183908e-05, "loss": 0.2398, "step": 8804 }, { "epoch": 30.362068965517242, "grad_norm": 0.8463532328605652, "learning_rate": 2.9286896551724138e-05, "loss": 0.24, "step": 8805 }, { "epoch": 30.36551724137931, "grad_norm": 0.7178825736045837, "learning_rate": 2.9286436781609196e-05, "loss": 0.2085, "step": 8806 }, { "epoch": 30.368965517241378, "grad_norm": 1.6815282106399536, "learning_rate": 2.9285977011494255e-05, "loss": 0.2088, "step": 8807 }, { "epoch": 30.372413793103448, "grad_norm": 1.4243227243423462, "learning_rate": 2.928551724137931e-05, "loss": 0.2221, "step": 8808 }, { "epoch": 30.375862068965517, "grad_norm": 0.6769554018974304, "learning_rate": 2.9285057471264366e-05, "loss": 0.1971, "step": 8809 }, { "epoch": 30.379310344827587, "grad_norm": 1.2463581562042236, "learning_rate": 2.9284597701149428e-05, "loss": 0.2105, "step": 8810 }, { "epoch": 30.382758620689657, "grad_norm": 1.0432194471359253, "learning_rate": 2.9284137931034483e-05, "loss": 0.2013, "step": 8811 }, { "epoch": 30.386206896551723, "grad_norm": 1.4291932582855225, "learning_rate": 2.928367816091954e-05, "loss": 0.2368, "step": 8812 }, { "epoch": 30.389655172413793, "grad_norm": 1.1040492057800293, "learning_rate": 2.9283218390804597e-05, "loss": 0.2069, "step": 8813 }, { "epoch": 30.393103448275863, "grad_norm": 1.0288565158843994, "learning_rate": 2.9282758620689656e-05, "loss": 0.2033, "step": 8814 }, { "epoch": 30.396551724137932, "grad_norm": 0.8358705639839172, "learning_rate": 2.9282298850574714e-05, "loss": 0.2155, "step": 8815 }, { "epoch": 30.4, "grad_norm": 0.7003178000450134, "learning_rate": 2.928183908045977e-05, "loss": 0.2149, "step": 8816 }, { "epoch": 30.40344827586207, "grad_norm": 1.298601746559143, "learning_rate": 2.9281379310344828e-05, "loss": 0.197, "step": 8817 }, { "epoch": 30.406896551724138, "grad_norm": 1.8652749061584473, "learning_rate": 2.9280919540229887e-05, "loss": 0.219, "step": 8818 }, { "epoch": 30.410344827586208, "grad_norm": 0.9948567748069763, "learning_rate": 2.9280459770114942e-05, "loss": 0.1825, "step": 8819 }, { "epoch": 30.413793103448278, "grad_norm": 0.6669936776161194, "learning_rate": 2.928e-05, "loss": 0.1993, "step": 8820 }, { "epoch": 30.417241379310344, "grad_norm": 1.5109230279922485, "learning_rate": 2.9279540229885056e-05, "loss": 0.2061, "step": 8821 }, { "epoch": 30.420689655172414, "grad_norm": 4.5698347091674805, "learning_rate": 2.9279080459770118e-05, "loss": 0.1933, "step": 8822 }, { "epoch": 30.424137931034483, "grad_norm": 1.5804989337921143, "learning_rate": 2.9278620689655174e-05, "loss": 0.1766, "step": 8823 }, { "epoch": 30.427586206896553, "grad_norm": 1.1259210109710693, "learning_rate": 2.927816091954023e-05, "loss": 0.2131, "step": 8824 }, { "epoch": 30.43103448275862, "grad_norm": 1.881569266319275, "learning_rate": 2.9277701149425288e-05, "loss": 0.2547, "step": 8825 }, { "epoch": 30.43448275862069, "grad_norm": 0.9778810739517212, "learning_rate": 2.9277241379310346e-05, "loss": 0.2593, "step": 8826 }, { "epoch": 30.43793103448276, "grad_norm": 0.8395897746086121, "learning_rate": 2.9276781609195405e-05, "loss": 0.2423, "step": 8827 }, { "epoch": 30.44137931034483, "grad_norm": 0.817679762840271, "learning_rate": 2.927632183908046e-05, "loss": 0.2469, "step": 8828 }, { "epoch": 30.444827586206898, "grad_norm": 0.6887062788009644, "learning_rate": 2.9275862068965515e-05, "loss": 0.255, "step": 8829 }, { "epoch": 30.448275862068964, "grad_norm": 1.2595515251159668, "learning_rate": 2.9275402298850578e-05, "loss": 0.2252, "step": 8830 }, { "epoch": 30.451724137931034, "grad_norm": 1.1798195838928223, "learning_rate": 2.9274942528735633e-05, "loss": 0.2497, "step": 8831 }, { "epoch": 30.455172413793104, "grad_norm": 0.6225171089172363, "learning_rate": 2.9274482758620688e-05, "loss": 0.2218, "step": 8832 }, { "epoch": 30.458620689655174, "grad_norm": 1.1131373643875122, "learning_rate": 2.9274022988505747e-05, "loss": 0.2581, "step": 8833 }, { "epoch": 30.46206896551724, "grad_norm": 0.8372476696968079, "learning_rate": 2.9273563218390805e-05, "loss": 0.2173, "step": 8834 }, { "epoch": 30.46551724137931, "grad_norm": 0.6735199093818665, "learning_rate": 2.9273103448275864e-05, "loss": 0.2078, "step": 8835 }, { "epoch": 30.46896551724138, "grad_norm": 0.9659063220024109, "learning_rate": 2.927264367816092e-05, "loss": 0.2687, "step": 8836 }, { "epoch": 30.47241379310345, "grad_norm": 0.848442792892456, "learning_rate": 2.9272183908045975e-05, "loss": 0.2075, "step": 8837 }, { "epoch": 30.47586206896552, "grad_norm": 0.9841694831848145, "learning_rate": 2.9271724137931037e-05, "loss": 0.2028, "step": 8838 }, { "epoch": 30.479310344827585, "grad_norm": 1.0937641859054565, "learning_rate": 2.9271264367816092e-05, "loss": 0.1966, "step": 8839 }, { "epoch": 30.482758620689655, "grad_norm": 1.0196211338043213, "learning_rate": 2.927080459770115e-05, "loss": 0.2458, "step": 8840 }, { "epoch": 30.486206896551725, "grad_norm": 0.778147280216217, "learning_rate": 2.9270344827586206e-05, "loss": 0.1971, "step": 8841 }, { "epoch": 30.489655172413794, "grad_norm": 0.7785834670066833, "learning_rate": 2.9269885057471265e-05, "loss": 0.1892, "step": 8842 }, { "epoch": 30.49310344827586, "grad_norm": 1.7003872394561768, "learning_rate": 2.9269425287356323e-05, "loss": 0.1894, "step": 8843 }, { "epoch": 30.49655172413793, "grad_norm": 0.5954653024673462, "learning_rate": 2.926896551724138e-05, "loss": 0.1755, "step": 8844 }, { "epoch": 30.5, "grad_norm": 0.7626427412033081, "learning_rate": 2.9268505747126437e-05, "loss": 0.1965, "step": 8845 }, { "epoch": 30.50344827586207, "grad_norm": 1.1107748746871948, "learning_rate": 2.9268045977011496e-05, "loss": 0.2289, "step": 8846 }, { "epoch": 30.50689655172414, "grad_norm": 0.617690920829773, "learning_rate": 2.926758620689655e-05, "loss": 0.1889, "step": 8847 }, { "epoch": 30.510344827586206, "grad_norm": 0.8923642635345459, "learning_rate": 2.926712643678161e-05, "loss": 0.1891, "step": 8848 }, { "epoch": 30.513793103448275, "grad_norm": 1.9072916507720947, "learning_rate": 2.9266666666666665e-05, "loss": 0.2356, "step": 8849 }, { "epoch": 30.517241379310345, "grad_norm": 2.7550246715545654, "learning_rate": 2.9266206896551727e-05, "loss": 0.2455, "step": 8850 }, { "epoch": 30.520689655172415, "grad_norm": 0.705619215965271, "learning_rate": 2.9265747126436783e-05, "loss": 0.3131, "step": 8851 }, { "epoch": 30.52413793103448, "grad_norm": 0.9372568726539612, "learning_rate": 2.9265287356321838e-05, "loss": 0.2412, "step": 8852 }, { "epoch": 30.52758620689655, "grad_norm": 0.9465014934539795, "learning_rate": 2.9264827586206897e-05, "loss": 0.283, "step": 8853 }, { "epoch": 30.53103448275862, "grad_norm": 0.5749289989471436, "learning_rate": 2.9264367816091955e-05, "loss": 0.2503, "step": 8854 }, { "epoch": 30.53448275862069, "grad_norm": 0.48840561509132385, "learning_rate": 2.9263908045977014e-05, "loss": 0.2322, "step": 8855 }, { "epoch": 30.53793103448276, "grad_norm": 0.6599872708320618, "learning_rate": 2.926344827586207e-05, "loss": 0.2211, "step": 8856 }, { "epoch": 30.541379310344826, "grad_norm": 0.5089055895805359, "learning_rate": 2.9262988505747125e-05, "loss": 0.2416, "step": 8857 }, { "epoch": 30.544827586206896, "grad_norm": 1.192472219467163, "learning_rate": 2.9262528735632187e-05, "loss": 0.2007, "step": 8858 }, { "epoch": 30.548275862068966, "grad_norm": 0.9454430937767029, "learning_rate": 2.9262068965517242e-05, "loss": 0.2206, "step": 8859 }, { "epoch": 30.551724137931036, "grad_norm": 0.6860490441322327, "learning_rate": 2.9261609195402297e-05, "loss": 0.2266, "step": 8860 }, { "epoch": 30.555172413793102, "grad_norm": 0.7330390810966492, "learning_rate": 2.9261149425287356e-05, "loss": 0.21, "step": 8861 }, { "epoch": 30.55862068965517, "grad_norm": 1.0097486972808838, "learning_rate": 2.9260689655172415e-05, "loss": 0.205, "step": 8862 }, { "epoch": 30.56206896551724, "grad_norm": 0.8003458976745605, "learning_rate": 2.9260229885057473e-05, "loss": 0.2013, "step": 8863 }, { "epoch": 30.56551724137931, "grad_norm": 0.6315908432006836, "learning_rate": 2.925977011494253e-05, "loss": 0.2032, "step": 8864 }, { "epoch": 30.56896551724138, "grad_norm": 0.715430736541748, "learning_rate": 2.9259310344827584e-05, "loss": 0.2095, "step": 8865 }, { "epoch": 30.572413793103447, "grad_norm": 0.8783465623855591, "learning_rate": 2.9258850574712646e-05, "loss": 0.2036, "step": 8866 }, { "epoch": 30.575862068965517, "grad_norm": 0.6984962224960327, "learning_rate": 2.92583908045977e-05, "loss": 0.2051, "step": 8867 }, { "epoch": 30.579310344827586, "grad_norm": 2.362419843673706, "learning_rate": 2.925793103448276e-05, "loss": 0.183, "step": 8868 }, { "epoch": 30.582758620689656, "grad_norm": 2.0469553470611572, "learning_rate": 2.9257471264367815e-05, "loss": 0.1931, "step": 8869 }, { "epoch": 30.586206896551722, "grad_norm": 0.7234535813331604, "learning_rate": 2.9257011494252874e-05, "loss": 0.1688, "step": 8870 }, { "epoch": 30.589655172413792, "grad_norm": 1.10584557056427, "learning_rate": 2.9256551724137933e-05, "loss": 0.2041, "step": 8871 }, { "epoch": 30.593103448275862, "grad_norm": 0.7447190880775452, "learning_rate": 2.9256091954022988e-05, "loss": 0.2077, "step": 8872 }, { "epoch": 30.59655172413793, "grad_norm": 1.2391183376312256, "learning_rate": 2.9255632183908047e-05, "loss": 0.1984, "step": 8873 }, { "epoch": 30.6, "grad_norm": NaN, "learning_rate": 2.9255632183908047e-05, "loss": 0.2098, "step": 8874 }, { "epoch": 30.603448275862068, "grad_norm": 2.2732439041137695, "learning_rate": 2.9255172413793105e-05, "loss": 0.2699, "step": 8875 }, { "epoch": 30.606896551724137, "grad_norm": 1.2167718410491943, "learning_rate": 2.925471264367816e-05, "loss": 0.2591, "step": 8876 }, { "epoch": 30.610344827586207, "grad_norm": 0.5460090041160583, "learning_rate": 2.925425287356322e-05, "loss": 0.2136, "step": 8877 }, { "epoch": 30.613793103448277, "grad_norm": 0.7401909232139587, "learning_rate": 2.9253793103448275e-05, "loss": 0.246, "step": 8878 }, { "epoch": 30.617241379310343, "grad_norm": 0.5577713847160339, "learning_rate": 2.9253333333333337e-05, "loss": 0.215, "step": 8879 }, { "epoch": 30.620689655172413, "grad_norm": 0.5677661895751953, "learning_rate": 2.9252873563218392e-05, "loss": 0.2343, "step": 8880 }, { "epoch": 30.624137931034483, "grad_norm": 1.019330382347107, "learning_rate": 2.9252413793103447e-05, "loss": 0.2232, "step": 8881 }, { "epoch": 30.627586206896552, "grad_norm": 1.035423755645752, "learning_rate": 2.9251954022988506e-05, "loss": 0.2229, "step": 8882 }, { "epoch": 30.631034482758622, "grad_norm": 0.5236851572990417, "learning_rate": 2.9251494252873565e-05, "loss": 0.2205, "step": 8883 }, { "epoch": 30.63448275862069, "grad_norm": 0.5944358110427856, "learning_rate": 2.9251034482758623e-05, "loss": 0.2055, "step": 8884 }, { "epoch": 30.637931034482758, "grad_norm": 0.48257744312286377, "learning_rate": 2.925057471264368e-05, "loss": 0.2122, "step": 8885 }, { "epoch": 30.641379310344828, "grad_norm": 0.769158661365509, "learning_rate": 2.9250114942528734e-05, "loss": 0.1904, "step": 8886 }, { "epoch": 30.644827586206898, "grad_norm": 0.5738797783851624, "learning_rate": 2.9249655172413796e-05, "loss": 0.1862, "step": 8887 }, { "epoch": 30.648275862068964, "grad_norm": 1.1423062086105347, "learning_rate": 2.924919540229885e-05, "loss": 0.1893, "step": 8888 }, { "epoch": 30.651724137931033, "grad_norm": 0.6110487580299377, "learning_rate": 2.924873563218391e-05, "loss": 0.208, "step": 8889 }, { "epoch": 30.655172413793103, "grad_norm": 0.5867713689804077, "learning_rate": 2.9248275862068965e-05, "loss": 0.2083, "step": 8890 }, { "epoch": 30.658620689655173, "grad_norm": 1.5976065397262573, "learning_rate": 2.9247816091954024e-05, "loss": 0.2031, "step": 8891 }, { "epoch": 30.662068965517243, "grad_norm": 0.908539354801178, "learning_rate": 2.9247356321839083e-05, "loss": 0.2014, "step": 8892 }, { "epoch": 30.66551724137931, "grad_norm": 0.7854989171028137, "learning_rate": 2.9246896551724138e-05, "loss": 0.1944, "step": 8893 }, { "epoch": 30.66896551724138, "grad_norm": 0.6966727375984192, "learning_rate": 2.9246436781609193e-05, "loss": 0.2041, "step": 8894 }, { "epoch": 30.67241379310345, "grad_norm": 0.6624903082847595, "learning_rate": 2.9245977011494255e-05, "loss": 0.1868, "step": 8895 }, { "epoch": 30.675862068965518, "grad_norm": 1.0444097518920898, "learning_rate": 2.924551724137931e-05, "loss": 0.2052, "step": 8896 }, { "epoch": 30.679310344827588, "grad_norm": 0.6188182830810547, "learning_rate": 2.924505747126437e-05, "loss": 0.1677, "step": 8897 }, { "epoch": 30.682758620689654, "grad_norm": 0.7818641662597656, "learning_rate": 2.9244597701149425e-05, "loss": 0.2125, "step": 8898 }, { "epoch": 30.686206896551724, "grad_norm": 0.7828735113143921, "learning_rate": 2.9244137931034483e-05, "loss": 0.177, "step": 8899 }, { "epoch": 30.689655172413794, "grad_norm": 1.528027892112732, "learning_rate": 2.9243678160919542e-05, "loss": 0.3073, "step": 8900 }, { "epoch": 30.693103448275863, "grad_norm": 0.7809054851531982, "learning_rate": 2.9243218390804597e-05, "loss": 0.2535, "step": 8901 }, { "epoch": 30.69655172413793, "grad_norm": 1.1763607263565063, "learning_rate": 2.9242758620689656e-05, "loss": 0.2238, "step": 8902 }, { "epoch": 30.7, "grad_norm": 0.8948944807052612, "learning_rate": 2.9242298850574715e-05, "loss": 0.2376, "step": 8903 }, { "epoch": 30.70344827586207, "grad_norm": 3.5251212120056152, "learning_rate": 2.924183908045977e-05, "loss": 0.2608, "step": 8904 }, { "epoch": 30.70689655172414, "grad_norm": 0.7112190127372742, "learning_rate": 2.924137931034483e-05, "loss": 0.2347, "step": 8905 }, { "epoch": 30.71034482758621, "grad_norm": 0.7941039800643921, "learning_rate": 2.9240919540229884e-05, "loss": 0.2048, "step": 8906 }, { "epoch": 30.713793103448275, "grad_norm": 2.004512310028076, "learning_rate": 2.9240459770114946e-05, "loss": 0.2405, "step": 8907 }, { "epoch": 30.717241379310344, "grad_norm": 0.6425161361694336, "learning_rate": 2.924e-05, "loss": 0.2465, "step": 8908 }, { "epoch": 30.720689655172414, "grad_norm": 0.6191850304603577, "learning_rate": 2.9239540229885057e-05, "loss": 0.2086, "step": 8909 }, { "epoch": 30.724137931034484, "grad_norm": 1.0471563339233398, "learning_rate": 2.9239080459770115e-05, "loss": 0.2079, "step": 8910 }, { "epoch": 30.72758620689655, "grad_norm": 0.5595778226852417, "learning_rate": 2.9238620689655174e-05, "loss": 0.2229, "step": 8911 }, { "epoch": 30.73103448275862, "grad_norm": 0.6680110096931458, "learning_rate": 2.9238160919540233e-05, "loss": 0.2193, "step": 8912 }, { "epoch": 30.73448275862069, "grad_norm": 0.5878438949584961, "learning_rate": 2.9237701149425288e-05, "loss": 0.2057, "step": 8913 }, { "epoch": 30.73793103448276, "grad_norm": 1.2033997774124146, "learning_rate": 2.9237241379310343e-05, "loss": 0.1965, "step": 8914 }, { "epoch": 30.74137931034483, "grad_norm": 2.35571551322937, "learning_rate": 2.9236781609195405e-05, "loss": 0.2187, "step": 8915 }, { "epoch": 30.744827586206895, "grad_norm": 0.8252328634262085, "learning_rate": 2.923632183908046e-05, "loss": 0.219, "step": 8916 }, { "epoch": 30.748275862068965, "grad_norm": 0.7656503915786743, "learning_rate": 2.923586206896552e-05, "loss": 0.1795, "step": 8917 }, { "epoch": 30.751724137931035, "grad_norm": 2.4293346405029297, "learning_rate": 2.9235402298850575e-05, "loss": 0.2012, "step": 8918 }, { "epoch": 30.755172413793105, "grad_norm": 7.721456527709961, "learning_rate": 2.9234942528735633e-05, "loss": 0.1954, "step": 8919 }, { "epoch": 30.75862068965517, "grad_norm": 0.9144800901412964, "learning_rate": 2.9234482758620692e-05, "loss": 0.1842, "step": 8920 }, { "epoch": 30.76206896551724, "grad_norm": 0.9056566953659058, "learning_rate": 2.9234022988505747e-05, "loss": 0.1803, "step": 8921 }, { "epoch": 30.76551724137931, "grad_norm": 0.8673612475395203, "learning_rate": 2.9233563218390802e-05, "loss": 0.1892, "step": 8922 }, { "epoch": 30.76896551724138, "grad_norm": 0.8351926803588867, "learning_rate": 2.9233103448275865e-05, "loss": 0.1958, "step": 8923 }, { "epoch": 30.77241379310345, "grad_norm": 2.2159998416900635, "learning_rate": 2.923264367816092e-05, "loss": 0.2419, "step": 8924 }, { "epoch": 30.775862068965516, "grad_norm": 1.328916072845459, "learning_rate": 2.923218390804598e-05, "loss": 0.3102, "step": 8925 }, { "epoch": 30.779310344827586, "grad_norm": 0.7300153970718384, "learning_rate": 2.9231724137931034e-05, "loss": 0.2932, "step": 8926 }, { "epoch": 30.782758620689656, "grad_norm": 0.5648002624511719, "learning_rate": 2.9231264367816093e-05, "loss": 0.2247, "step": 8927 }, { "epoch": 30.786206896551725, "grad_norm": 0.5587320327758789, "learning_rate": 2.923080459770115e-05, "loss": 0.2459, "step": 8928 }, { "epoch": 30.78965517241379, "grad_norm": 1.0229530334472656, "learning_rate": 2.9230344827586206e-05, "loss": 0.2438, "step": 8929 }, { "epoch": 30.79310344827586, "grad_norm": 0.7715327143669128, "learning_rate": 2.9229885057471265e-05, "loss": 0.2428, "step": 8930 }, { "epoch": 30.79655172413793, "grad_norm": 0.5917507410049438, "learning_rate": 2.9229425287356324e-05, "loss": 0.2099, "step": 8931 }, { "epoch": 30.8, "grad_norm": 1.103078007698059, "learning_rate": 2.922896551724138e-05, "loss": 0.2199, "step": 8932 }, { "epoch": 30.80344827586207, "grad_norm": 0.8396797776222229, "learning_rate": 2.9228505747126438e-05, "loss": 0.2148, "step": 8933 }, { "epoch": 30.806896551724137, "grad_norm": 1.6353191137313843, "learning_rate": 2.9228045977011493e-05, "loss": 0.2189, "step": 8934 }, { "epoch": 30.810344827586206, "grad_norm": 1.6655051708221436, "learning_rate": 2.9227586206896555e-05, "loss": 0.2219, "step": 8935 }, { "epoch": 30.813793103448276, "grad_norm": 0.7609307765960693, "learning_rate": 2.922712643678161e-05, "loss": 0.1929, "step": 8936 }, { "epoch": 30.817241379310346, "grad_norm": 1.5278671979904175, "learning_rate": 2.9226666666666666e-05, "loss": 0.2028, "step": 8937 }, { "epoch": 30.820689655172412, "grad_norm": 0.8794884085655212, "learning_rate": 2.9226206896551724e-05, "loss": 0.2167, "step": 8938 }, { "epoch": 30.824137931034482, "grad_norm": 1.021066665649414, "learning_rate": 2.9225747126436783e-05, "loss": 0.2424, "step": 8939 }, { "epoch": 30.82758620689655, "grad_norm": 0.6538940072059631, "learning_rate": 2.9225287356321842e-05, "loss": 0.2162, "step": 8940 }, { "epoch": 30.83103448275862, "grad_norm": 0.578031599521637, "learning_rate": 2.9224827586206897e-05, "loss": 0.1737, "step": 8941 }, { "epoch": 30.83448275862069, "grad_norm": 0.573551595211029, "learning_rate": 2.9224367816091952e-05, "loss": 0.1945, "step": 8942 }, { "epoch": 30.837931034482757, "grad_norm": 0.8408790230751038, "learning_rate": 2.9223908045977015e-05, "loss": 0.2208, "step": 8943 }, { "epoch": 30.841379310344827, "grad_norm": 1.0212230682373047, "learning_rate": 2.922344827586207e-05, "loss": 0.2074, "step": 8944 }, { "epoch": 30.844827586206897, "grad_norm": 1.5705915689468384, "learning_rate": 2.922298850574713e-05, "loss": 0.1861, "step": 8945 }, { "epoch": 30.848275862068967, "grad_norm": 1.051123023033142, "learning_rate": 2.9222528735632184e-05, "loss": 0.1902, "step": 8946 }, { "epoch": 30.851724137931036, "grad_norm": 1.0096279382705688, "learning_rate": 2.9222068965517242e-05, "loss": 0.2062, "step": 8947 }, { "epoch": 30.855172413793102, "grad_norm": 0.984629213809967, "learning_rate": 2.92216091954023e-05, "loss": 0.2121, "step": 8948 }, { "epoch": 30.858620689655172, "grad_norm": 1.0151238441467285, "learning_rate": 2.9221149425287356e-05, "loss": 0.2091, "step": 8949 }, { "epoch": 30.862068965517242, "grad_norm": 0.9507035613059998, "learning_rate": 2.9220689655172412e-05, "loss": 0.2698, "step": 8950 }, { "epoch": 30.86551724137931, "grad_norm": 0.6552464962005615, "learning_rate": 2.9220229885057474e-05, "loss": 0.2623, "step": 8951 }, { "epoch": 30.868965517241378, "grad_norm": 1.65329110622406, "learning_rate": 2.921977011494253e-05, "loss": 0.2495, "step": 8952 }, { "epoch": 30.872413793103448, "grad_norm": 0.5384612083435059, "learning_rate": 2.9219310344827588e-05, "loss": 0.2444, "step": 8953 }, { "epoch": 30.875862068965517, "grad_norm": 0.7929223775863647, "learning_rate": 2.9218850574712643e-05, "loss": 0.2124, "step": 8954 }, { "epoch": 30.879310344827587, "grad_norm": 0.7201306223869324, "learning_rate": 2.9218390804597702e-05, "loss": 0.2263, "step": 8955 }, { "epoch": 30.882758620689657, "grad_norm": 0.7313744425773621, "learning_rate": 2.921793103448276e-05, "loss": 0.2381, "step": 8956 }, { "epoch": 30.886206896551723, "grad_norm": 0.5354700684547424, "learning_rate": 2.9217471264367816e-05, "loss": 0.2122, "step": 8957 }, { "epoch": 30.889655172413793, "grad_norm": 0.6604229807853699, "learning_rate": 2.9217011494252874e-05, "loss": 0.2224, "step": 8958 }, { "epoch": 30.893103448275863, "grad_norm": 0.7542055249214172, "learning_rate": 2.9216551724137933e-05, "loss": 0.224, "step": 8959 }, { "epoch": 30.896551724137932, "grad_norm": 0.8702682256698608, "learning_rate": 2.921609195402299e-05, "loss": 0.2129, "step": 8960 }, { "epoch": 30.9, "grad_norm": 0.48695147037506104, "learning_rate": 2.9215632183908047e-05, "loss": 0.1905, "step": 8961 }, { "epoch": 30.90344827586207, "grad_norm": 0.5717484951019287, "learning_rate": 2.9215172413793102e-05, "loss": 0.2119, "step": 8962 }, { "epoch": 30.906896551724138, "grad_norm": 0.9051303267478943, "learning_rate": 2.9214712643678164e-05, "loss": 0.1896, "step": 8963 }, { "epoch": 30.910344827586208, "grad_norm": 0.948490560054779, "learning_rate": 2.921425287356322e-05, "loss": 0.2119, "step": 8964 }, { "epoch": 30.913793103448278, "grad_norm": 0.8761326670646667, "learning_rate": 2.9213793103448275e-05, "loss": 0.1912, "step": 8965 }, { "epoch": 30.917241379310344, "grad_norm": 1.2740987539291382, "learning_rate": 2.9213333333333334e-05, "loss": 0.1899, "step": 8966 }, { "epoch": 30.920689655172414, "grad_norm": 0.8929611444473267, "learning_rate": 2.9212873563218392e-05, "loss": 0.2096, "step": 8967 }, { "epoch": 30.924137931034483, "grad_norm": 1.5253188610076904, "learning_rate": 2.921241379310345e-05, "loss": 0.1852, "step": 8968 }, { "epoch": 30.927586206896553, "grad_norm": 0.8793511986732483, "learning_rate": 2.9211954022988506e-05, "loss": 0.1931, "step": 8969 }, { "epoch": 30.93103448275862, "grad_norm": 8.783658027648926, "learning_rate": 2.921149425287356e-05, "loss": 0.2022, "step": 8970 }, { "epoch": 30.93448275862069, "grad_norm": 0.9622078537940979, "learning_rate": 2.9211034482758624e-05, "loss": 0.231, "step": 8971 }, { "epoch": 30.93793103448276, "grad_norm": 0.7876116037368774, "learning_rate": 2.921057471264368e-05, "loss": 0.1966, "step": 8972 }, { "epoch": 30.94137931034483, "grad_norm": 0.9148819446563721, "learning_rate": 2.9210114942528738e-05, "loss": 0.2116, "step": 8973 }, { "epoch": 30.944827586206898, "grad_norm": 1.0139532089233398, "learning_rate": 2.9209655172413793e-05, "loss": 0.1965, "step": 8974 }, { "epoch": 30.948275862068964, "grad_norm": 1.9440702199935913, "learning_rate": 2.9209195402298852e-05, "loss": 0.2601, "step": 8975 }, { "epoch": 30.951724137931034, "grad_norm": 0.7486427426338196, "learning_rate": 2.920873563218391e-05, "loss": 0.2879, "step": 8976 }, { "epoch": 30.955172413793104, "grad_norm": 0.5794453620910645, "learning_rate": 2.9208275862068966e-05, "loss": 0.1957, "step": 8977 }, { "epoch": 30.958620689655174, "grad_norm": 1.0670418739318848, "learning_rate": 2.9207816091954024e-05, "loss": 0.2588, "step": 8978 }, { "epoch": 30.96206896551724, "grad_norm": 0.6758306622505188, "learning_rate": 2.9207356321839083e-05, "loss": 0.2255, "step": 8979 }, { "epoch": 30.96551724137931, "grad_norm": 0.6517006158828735, "learning_rate": 2.920689655172414e-05, "loss": 0.2351, "step": 8980 }, { "epoch": 30.96896551724138, "grad_norm": 0.5052666068077087, "learning_rate": 2.9206436781609197e-05, "loss": 0.2003, "step": 8981 }, { "epoch": 30.97241379310345, "grad_norm": 0.6074002981185913, "learning_rate": 2.9205977011494252e-05, "loss": 0.2157, "step": 8982 }, { "epoch": 30.97586206896552, "grad_norm": 1.0062592029571533, "learning_rate": 2.920551724137931e-05, "loss": 0.1764, "step": 8983 }, { "epoch": 30.979310344827585, "grad_norm": 1.4482295513153076, "learning_rate": 2.920505747126437e-05, "loss": 0.2192, "step": 8984 }, { "epoch": 30.982758620689655, "grad_norm": 0.6412314772605896, "learning_rate": 2.9204597701149425e-05, "loss": 0.2017, "step": 8985 }, { "epoch": 30.986206896551725, "grad_norm": 2.0004239082336426, "learning_rate": 2.9204137931034484e-05, "loss": 0.2138, "step": 8986 }, { "epoch": 30.989655172413794, "grad_norm": 0.883857011795044, "learning_rate": 2.9203678160919542e-05, "loss": 0.1979, "step": 8987 }, { "epoch": 30.99310344827586, "grad_norm": 0.7440073490142822, "learning_rate": 2.9203218390804598e-05, "loss": 0.1662, "step": 8988 }, { "epoch": 30.99655172413793, "grad_norm": 0.8469119668006897, "learning_rate": 2.9202758620689656e-05, "loss": 0.1947, "step": 8989 }, { "epoch": 31.0, "grad_norm": 2.2968568801879883, "learning_rate": 2.920229885057471e-05, "loss": 0.2571, "step": 8990 }, { "epoch": 31.00344827586207, "grad_norm": 1.6033411026000977, "learning_rate": 2.9201839080459774e-05, "loss": 0.281, "step": 8991 }, { "epoch": 31.00689655172414, "grad_norm": 0.7682844996452332, "learning_rate": 2.920137931034483e-05, "loss": 0.2449, "step": 8992 }, { "epoch": 31.010344827586206, "grad_norm": 0.9158478379249573, "learning_rate": 2.9200919540229884e-05, "loss": 0.247, "step": 8993 }, { "epoch": 31.013793103448275, "grad_norm": 0.46892333030700684, "learning_rate": 2.9200459770114943e-05, "loss": 0.2171, "step": 8994 }, { "epoch": 31.017241379310345, "grad_norm": 0.674008846282959, "learning_rate": 2.92e-05, "loss": 0.224, "step": 8995 }, { "epoch": 31.020689655172415, "grad_norm": 0.8634347915649414, "learning_rate": 2.919954022988506e-05, "loss": 0.2104, "step": 8996 }, { "epoch": 31.02413793103448, "grad_norm": 0.6161355376243591, "learning_rate": 2.9199080459770116e-05, "loss": 0.2051, "step": 8997 }, { "epoch": 31.02758620689655, "grad_norm": 1.2493816614151, "learning_rate": 2.919862068965517e-05, "loss": 0.233, "step": 8998 }, { "epoch": 31.03103448275862, "grad_norm": 0.8137596845626831, "learning_rate": 2.9198160919540233e-05, "loss": 0.1902, "step": 8999 }, { "epoch": 31.03448275862069, "grad_norm": 0.6471794247627258, "learning_rate": 2.9197701149425288e-05, "loss": 0.1819, "step": 9000 }, { "epoch": 31.03448275862069, "eval_cer": 0.13340550014175623, "eval_loss": 0.3187183737754822, "eval_runtime": 18.1707, "eval_samples_per_second": 51.016, "eval_steps_per_second": 0.165, "eval_wer": 0.30344202898550726, "step": 9000 }, { "epoch": 31.03793103448276, "grad_norm": 0.7975764274597168, "learning_rate": 2.9197241379310347e-05, "loss": 0.1981, "step": 9001 }, { "epoch": 31.041379310344826, "grad_norm": 0.5648518800735474, "learning_rate": 2.9196781609195402e-05, "loss": 0.1774, "step": 9002 }, { "epoch": 31.044827586206896, "grad_norm": 1.1363807916641235, "learning_rate": 2.919632183908046e-05, "loss": 0.2009, "step": 9003 }, { "epoch": 31.048275862068966, "grad_norm": 0.5132907032966614, "learning_rate": 2.919586206896552e-05, "loss": 0.1899, "step": 9004 }, { "epoch": 31.051724137931036, "grad_norm": 0.8606603741645813, "learning_rate": 2.9195402298850575e-05, "loss": 0.1879, "step": 9005 }, { "epoch": 31.055172413793102, "grad_norm": 0.9631147384643555, "learning_rate": 2.9194942528735634e-05, "loss": 0.1946, "step": 9006 }, { "epoch": 31.05862068965517, "grad_norm": 1.0585651397705078, "learning_rate": 2.9194482758620692e-05, "loss": 0.1923, "step": 9007 }, { "epoch": 31.06206896551724, "grad_norm": 0.6561844944953918, "learning_rate": 2.9194022988505748e-05, "loss": 0.2118, "step": 9008 }, { "epoch": 31.06551724137931, "grad_norm": 0.9308727979660034, "learning_rate": 2.9193563218390806e-05, "loss": 0.1797, "step": 9009 }, { "epoch": 31.06896551724138, "grad_norm": 1.1073622703552246, "learning_rate": 2.919310344827586e-05, "loss": 0.1833, "step": 9010 }, { "epoch": 31.072413793103447, "grad_norm": 0.8551403880119324, "learning_rate": 2.919264367816092e-05, "loss": 0.1738, "step": 9011 }, { "epoch": 31.075862068965517, "grad_norm": 0.8665105104446411, "learning_rate": 2.919218390804598e-05, "loss": 0.1868, "step": 9012 }, { "epoch": 31.079310344827586, "grad_norm": 0.9836547374725342, "learning_rate": 2.9191724137931034e-05, "loss": 0.2132, "step": 9013 }, { "epoch": 31.082758620689656, "grad_norm": 1.207361102104187, "learning_rate": 2.9191264367816093e-05, "loss": 0.2298, "step": 9014 }, { "epoch": 31.086206896551722, "grad_norm": 1.580267310142517, "learning_rate": 2.919080459770115e-05, "loss": 0.2248, "step": 9015 }, { "epoch": 31.089655172413792, "grad_norm": 0.7730626463890076, "learning_rate": 2.9190344827586207e-05, "loss": 0.2947, "step": 9016 }, { "epoch": 31.093103448275862, "grad_norm": 0.8110123872756958, "learning_rate": 2.9189885057471266e-05, "loss": 0.2019, "step": 9017 }, { "epoch": 31.09655172413793, "grad_norm": 0.8207552433013916, "learning_rate": 2.918942528735632e-05, "loss": 0.2162, "step": 9018 }, { "epoch": 31.1, "grad_norm": 1.1304677724838257, "learning_rate": 2.9188965517241383e-05, "loss": 0.2279, "step": 9019 }, { "epoch": 31.103448275862068, "grad_norm": 0.6566921472549438, "learning_rate": 2.9188505747126438e-05, "loss": 0.221, "step": 9020 }, { "epoch": 31.106896551724137, "grad_norm": 0.9944555163383484, "learning_rate": 2.9188045977011494e-05, "loss": 0.2017, "step": 9021 }, { "epoch": 31.110344827586207, "grad_norm": 0.49315664172172546, "learning_rate": 2.9187586206896552e-05, "loss": 0.2089, "step": 9022 }, { "epoch": 31.113793103448277, "grad_norm": 0.6844463348388672, "learning_rate": 2.918712643678161e-05, "loss": 0.2471, "step": 9023 }, { "epoch": 31.117241379310343, "grad_norm": 1.0412535667419434, "learning_rate": 2.918666666666667e-05, "loss": 0.2146, "step": 9024 }, { "epoch": 31.120689655172413, "grad_norm": 1.0365855693817139, "learning_rate": 2.9186206896551725e-05, "loss": 0.2047, "step": 9025 }, { "epoch": 31.124137931034483, "grad_norm": 0.5691492557525635, "learning_rate": 2.918574712643678e-05, "loss": 0.1912, "step": 9026 }, { "epoch": 31.127586206896552, "grad_norm": 0.6478983163833618, "learning_rate": 2.9185287356321842e-05, "loss": 0.2268, "step": 9027 }, { "epoch": 31.131034482758622, "grad_norm": 1.6190807819366455, "learning_rate": 2.9184827586206898e-05, "loss": 0.2008, "step": 9028 }, { "epoch": 31.13448275862069, "grad_norm": 0.9275161027908325, "learning_rate": 2.9184367816091956e-05, "loss": 0.1846, "step": 9029 }, { "epoch": 31.137931034482758, "grad_norm": 1.6858323812484741, "learning_rate": 2.918390804597701e-05, "loss": 0.1993, "step": 9030 }, { "epoch": 31.141379310344828, "grad_norm": 0.6510912179946899, "learning_rate": 2.918344827586207e-05, "loss": 0.1964, "step": 9031 }, { "epoch": 31.144827586206898, "grad_norm": 0.8902737498283386, "learning_rate": 2.918298850574713e-05, "loss": 0.1597, "step": 9032 }, { "epoch": 31.148275862068967, "grad_norm": 0.9879864454269409, "learning_rate": 2.9182528735632184e-05, "loss": 0.1665, "step": 9033 }, { "epoch": 31.151724137931033, "grad_norm": 0.8640586733818054, "learning_rate": 2.9182068965517243e-05, "loss": 0.156, "step": 9034 }, { "epoch": 31.155172413793103, "grad_norm": 1.460316777229309, "learning_rate": 2.91816091954023e-05, "loss": 0.1725, "step": 9035 }, { "epoch": 31.158620689655173, "grad_norm": 0.8942238688468933, "learning_rate": 2.9181149425287357e-05, "loss": 0.1841, "step": 9036 }, { "epoch": 31.162068965517243, "grad_norm": 0.9577195644378662, "learning_rate": 2.9180689655172416e-05, "loss": 0.161, "step": 9037 }, { "epoch": 31.16551724137931, "grad_norm": 0.9303452968597412, "learning_rate": 2.918022988505747e-05, "loss": 0.1798, "step": 9038 }, { "epoch": 31.16896551724138, "grad_norm": 0.8851576447486877, "learning_rate": 2.917977011494253e-05, "loss": 0.2123, "step": 9039 }, { "epoch": 31.17241379310345, "grad_norm": 1.3051408529281616, "learning_rate": 2.9179310344827588e-05, "loss": 0.2972, "step": 9040 }, { "epoch": 31.175862068965518, "grad_norm": 0.7186324596405029, "learning_rate": 2.9178850574712643e-05, "loss": 0.2841, "step": 9041 }, { "epoch": 31.179310344827588, "grad_norm": 0.7035830616950989, "learning_rate": 2.9178390804597702e-05, "loss": 0.2499, "step": 9042 }, { "epoch": 31.182758620689654, "grad_norm": 0.8395261168479919, "learning_rate": 2.917793103448276e-05, "loss": 0.2163, "step": 9043 }, { "epoch": 31.186206896551724, "grad_norm": 0.6418234705924988, "learning_rate": 2.9177471264367816e-05, "loss": 0.2241, "step": 9044 }, { "epoch": 31.189655172413794, "grad_norm": 0.7687892317771912, "learning_rate": 2.9177011494252875e-05, "loss": 0.2264, "step": 9045 }, { "epoch": 31.193103448275863, "grad_norm": 0.9295394420623779, "learning_rate": 2.917655172413793e-05, "loss": 0.2145, "step": 9046 }, { "epoch": 31.19655172413793, "grad_norm": 1.124597430229187, "learning_rate": 2.9176091954022992e-05, "loss": 0.2329, "step": 9047 }, { "epoch": 31.2, "grad_norm": 1.049923062324524, "learning_rate": 2.9175632183908047e-05, "loss": 0.1978, "step": 9048 }, { "epoch": 31.20344827586207, "grad_norm": 1.0907690525054932, "learning_rate": 2.9175172413793103e-05, "loss": 0.2191, "step": 9049 }, { "epoch": 31.20689655172414, "grad_norm": 0.9052553772926331, "learning_rate": 2.917471264367816e-05, "loss": 0.1997, "step": 9050 }, { "epoch": 31.21034482758621, "grad_norm": 0.8534867763519287, "learning_rate": 2.9174252873563217e-05, "loss": 0.1801, "step": 9051 }, { "epoch": 31.213793103448275, "grad_norm": 0.785053014755249, "learning_rate": 2.917379310344828e-05, "loss": 0.2319, "step": 9052 }, { "epoch": 31.217241379310344, "grad_norm": 0.6736972332000732, "learning_rate": 2.9173333333333334e-05, "loss": 0.2134, "step": 9053 }, { "epoch": 31.220689655172414, "grad_norm": 1.2012107372283936, "learning_rate": 2.917287356321839e-05, "loss": 0.1806, "step": 9054 }, { "epoch": 31.224137931034484, "grad_norm": 0.6734849810600281, "learning_rate": 2.9172413793103448e-05, "loss": 0.1827, "step": 9055 }, { "epoch": 31.22758620689655, "grad_norm": 0.6598568558692932, "learning_rate": 2.9171954022988507e-05, "loss": 0.1886, "step": 9056 }, { "epoch": 31.23103448275862, "grad_norm": 0.7723358869552612, "learning_rate": 2.9171494252873565e-05, "loss": 0.1767, "step": 9057 }, { "epoch": 31.23448275862069, "grad_norm": 0.6600731015205383, "learning_rate": 2.917103448275862e-05, "loss": 0.2183, "step": 9058 }, { "epoch": 31.23793103448276, "grad_norm": 0.7734520435333252, "learning_rate": 2.9170574712643676e-05, "loss": 0.202, "step": 9059 }, { "epoch": 31.24137931034483, "grad_norm": 0.8535692691802979, "learning_rate": 2.9170114942528738e-05, "loss": 0.1746, "step": 9060 }, { "epoch": 31.244827586206895, "grad_norm": 0.618579089641571, "learning_rate": 2.9169655172413793e-05, "loss": 0.172, "step": 9061 }, { "epoch": 31.248275862068965, "grad_norm": 1.342378854751587, "learning_rate": 2.9169195402298852e-05, "loss": 0.191, "step": 9062 }, { "epoch": 31.251724137931035, "grad_norm": 1.0556596517562866, "learning_rate": 2.9168735632183907e-05, "loss": 0.2114, "step": 9063 }, { "epoch": 31.255172413793105, "grad_norm": 1.3499572277069092, "learning_rate": 2.9168275862068966e-05, "loss": 0.206, "step": 9064 }, { "epoch": 31.25862068965517, "grad_norm": 1.2298028469085693, "learning_rate": 2.9167816091954025e-05, "loss": 0.2369, "step": 9065 }, { "epoch": 31.26206896551724, "grad_norm": 0.7875101566314697, "learning_rate": 2.916735632183908e-05, "loss": 0.2684, "step": 9066 }, { "epoch": 31.26551724137931, "grad_norm": 0.8465641736984253, "learning_rate": 2.916689655172414e-05, "loss": 0.2542, "step": 9067 }, { "epoch": 31.26896551724138, "grad_norm": 0.8567309975624084, "learning_rate": 2.9166436781609197e-05, "loss": 0.2166, "step": 9068 }, { "epoch": 31.27241379310345, "grad_norm": 0.8562008142471313, "learning_rate": 2.9165977011494253e-05, "loss": 0.227, "step": 9069 }, { "epoch": 31.275862068965516, "grad_norm": 0.686280369758606, "learning_rate": 2.916551724137931e-05, "loss": 0.2444, "step": 9070 }, { "epoch": 31.279310344827586, "grad_norm": 0.7949183583259583, "learning_rate": 2.9165057471264367e-05, "loss": 0.2261, "step": 9071 }, { "epoch": 31.282758620689656, "grad_norm": 0.7927069067955017, "learning_rate": 2.9164597701149425e-05, "loss": 0.2326, "step": 9072 }, { "epoch": 31.286206896551725, "grad_norm": 0.5746904015541077, "learning_rate": 2.9164137931034484e-05, "loss": 0.1895, "step": 9073 }, { "epoch": 31.28965517241379, "grad_norm": 0.8198239207267761, "learning_rate": 2.916367816091954e-05, "loss": 0.19, "step": 9074 }, { "epoch": 31.29310344827586, "grad_norm": 1.1731150150299072, "learning_rate": 2.9163218390804598e-05, "loss": 0.2104, "step": 9075 }, { "epoch": 31.29655172413793, "grad_norm": 2.3221983909606934, "learning_rate": 2.9162758620689657e-05, "loss": 0.199, "step": 9076 }, { "epoch": 31.3, "grad_norm": 0.7656028866767883, "learning_rate": 2.9162298850574712e-05, "loss": 0.219, "step": 9077 }, { "epoch": 31.30344827586207, "grad_norm": 0.5477542281150818, "learning_rate": 2.916183908045977e-05, "loss": 0.1782, "step": 9078 }, { "epoch": 31.306896551724137, "grad_norm": 0.49029481410980225, "learning_rate": 2.9161379310344826e-05, "loss": 0.1845, "step": 9079 }, { "epoch": 31.310344827586206, "grad_norm": 1.0534087419509888, "learning_rate": 2.9160919540229888e-05, "loss": 0.2097, "step": 9080 }, { "epoch": 31.313793103448276, "grad_norm": 0.6491628885269165, "learning_rate": 2.9160459770114943e-05, "loss": 0.1587, "step": 9081 }, { "epoch": 31.317241379310346, "grad_norm": 1.297947645187378, "learning_rate": 2.916e-05, "loss": 0.1954, "step": 9082 }, { "epoch": 31.320689655172412, "grad_norm": 0.5680183172225952, "learning_rate": 2.9159540229885057e-05, "loss": 0.1621, "step": 9083 }, { "epoch": 31.324137931034482, "grad_norm": 0.68705153465271, "learning_rate": 2.9159080459770116e-05, "loss": 0.1746, "step": 9084 }, { "epoch": 31.32758620689655, "grad_norm": 0.6904117465019226, "learning_rate": 2.9158620689655175e-05, "loss": 0.1746, "step": 9085 }, { "epoch": 31.33103448275862, "grad_norm": 1.3224886655807495, "learning_rate": 2.915816091954023e-05, "loss": 0.1821, "step": 9086 }, { "epoch": 31.33448275862069, "grad_norm": 0.6316081881523132, "learning_rate": 2.9157701149425285e-05, "loss": 0.1699, "step": 9087 }, { "epoch": 31.337931034482757, "grad_norm": 1.1902656555175781, "learning_rate": 2.9157241379310347e-05, "loss": 0.1677, "step": 9088 }, { "epoch": 31.341379310344827, "grad_norm": 0.7705562710762024, "learning_rate": 2.9156781609195403e-05, "loss": 0.1847, "step": 9089 }, { "epoch": 31.344827586206897, "grad_norm": 1.1939457654953003, "learning_rate": 2.915632183908046e-05, "loss": 0.2313, "step": 9090 }, { "epoch": 31.348275862068967, "grad_norm": 0.4801020622253418, "learning_rate": 2.9155862068965517e-05, "loss": 0.2554, "step": 9091 }, { "epoch": 31.351724137931033, "grad_norm": 0.5452142953872681, "learning_rate": 2.9155402298850575e-05, "loss": 0.2384, "step": 9092 }, { "epoch": 31.355172413793102, "grad_norm": 0.5548989772796631, "learning_rate": 2.9154942528735634e-05, "loss": 0.2726, "step": 9093 }, { "epoch": 31.358620689655172, "grad_norm": 0.7630265951156616, "learning_rate": 2.915448275862069e-05, "loss": 0.1948, "step": 9094 }, { "epoch": 31.362068965517242, "grad_norm": 0.5844305157661438, "learning_rate": 2.9154022988505748e-05, "loss": 0.2095, "step": 9095 }, { "epoch": 31.36551724137931, "grad_norm": 0.8201321959495544, "learning_rate": 2.9153563218390807e-05, "loss": 0.2106, "step": 9096 }, { "epoch": 31.368965517241378, "grad_norm": 0.5345842838287354, "learning_rate": 2.9153103448275862e-05, "loss": 0.2044, "step": 9097 }, { "epoch": 31.372413793103448, "grad_norm": 0.6358880996704102, "learning_rate": 2.915264367816092e-05, "loss": 0.2009, "step": 9098 }, { "epoch": 31.375862068965517, "grad_norm": 0.591245174407959, "learning_rate": 2.9152183908045976e-05, "loss": 0.2059, "step": 9099 }, { "epoch": 31.379310344827587, "grad_norm": 0.7604411244392395, "learning_rate": 2.9151724137931035e-05, "loss": 0.212, "step": 9100 }, { "epoch": 31.382758620689657, "grad_norm": 1.596741795539856, "learning_rate": 2.9151264367816093e-05, "loss": 0.2014, "step": 9101 }, { "epoch": 31.386206896551723, "grad_norm": 0.572734534740448, "learning_rate": 2.915080459770115e-05, "loss": 0.2021, "step": 9102 }, { "epoch": 31.389655172413793, "grad_norm": 0.7633506059646606, "learning_rate": 2.9150344827586207e-05, "loss": 0.1989, "step": 9103 }, { "epoch": 31.393103448275863, "grad_norm": 0.6133649945259094, "learning_rate": 2.9149885057471266e-05, "loss": 0.2188, "step": 9104 }, { "epoch": 31.396551724137932, "grad_norm": 1.1297097206115723, "learning_rate": 2.914942528735632e-05, "loss": 0.1826, "step": 9105 }, { "epoch": 31.4, "grad_norm": 0.7036461234092712, "learning_rate": 2.914896551724138e-05, "loss": 0.1993, "step": 9106 }, { "epoch": 31.40344827586207, "grad_norm": 0.8292080760002136, "learning_rate": 2.9148505747126435e-05, "loss": 0.2097, "step": 9107 }, { "epoch": 31.406896551724138, "grad_norm": 1.1585500240325928, "learning_rate": 2.9148045977011497e-05, "loss": 0.2062, "step": 9108 }, { "epoch": 31.410344827586208, "grad_norm": 0.6126819252967834, "learning_rate": 2.9147586206896553e-05, "loss": 0.1963, "step": 9109 }, { "epoch": 31.413793103448278, "grad_norm": 0.8674826622009277, "learning_rate": 2.9147126436781608e-05, "loss": 0.1714, "step": 9110 }, { "epoch": 31.417241379310344, "grad_norm": 0.7326771020889282, "learning_rate": 2.9146666666666667e-05, "loss": 0.21, "step": 9111 }, { "epoch": 31.420689655172414, "grad_norm": 1.4284727573394775, "learning_rate": 2.9146206896551725e-05, "loss": 0.179, "step": 9112 }, { "epoch": 31.424137931034483, "grad_norm": 0.8778664469718933, "learning_rate": 2.9145747126436784e-05, "loss": 0.1901, "step": 9113 }, { "epoch": 31.427586206896553, "grad_norm": 0.9304081797599792, "learning_rate": 2.914528735632184e-05, "loss": 0.1732, "step": 9114 }, { "epoch": 31.43103448275862, "grad_norm": 0.9724273085594177, "learning_rate": 2.9144827586206895e-05, "loss": 0.2455, "step": 9115 }, { "epoch": 31.43448275862069, "grad_norm": 0.7150082588195801, "learning_rate": 2.9144367816091957e-05, "loss": 0.2682, "step": 9116 }, { "epoch": 31.43793103448276, "grad_norm": 0.5860435366630554, "learning_rate": 2.9143908045977012e-05, "loss": 0.2331, "step": 9117 }, { "epoch": 31.44137931034483, "grad_norm": 0.5817816853523254, "learning_rate": 2.914344827586207e-05, "loss": 0.2188, "step": 9118 }, { "epoch": 31.444827586206898, "grad_norm": 0.9741053581237793, "learning_rate": 2.9142988505747126e-05, "loss": 0.2147, "step": 9119 }, { "epoch": 31.448275862068964, "grad_norm": 0.7753375768661499, "learning_rate": 2.9142528735632185e-05, "loss": 0.2157, "step": 9120 }, { "epoch": 31.451724137931034, "grad_norm": 0.7889776229858398, "learning_rate": 2.9142068965517243e-05, "loss": 0.2102, "step": 9121 }, { "epoch": 31.455172413793104, "grad_norm": 0.8492022156715393, "learning_rate": 2.91416091954023e-05, "loss": 0.2315, "step": 9122 }, { "epoch": 31.458620689655174, "grad_norm": 1.922889232635498, "learning_rate": 2.9141149425287357e-05, "loss": 0.1939, "step": 9123 }, { "epoch": 31.46206896551724, "grad_norm": 0.8943997025489807, "learning_rate": 2.9140689655172416e-05, "loss": 0.1895, "step": 9124 }, { "epoch": 31.46551724137931, "grad_norm": 0.5311704874038696, "learning_rate": 2.914022988505747e-05, "loss": 0.1821, "step": 9125 }, { "epoch": 31.46896551724138, "grad_norm": 0.8188589215278625, "learning_rate": 2.913977011494253e-05, "loss": 0.2097, "step": 9126 }, { "epoch": 31.47241379310345, "grad_norm": 0.7429159283638, "learning_rate": 2.9139310344827585e-05, "loss": 0.1801, "step": 9127 }, { "epoch": 31.47586206896552, "grad_norm": 0.6181626915931702, "learning_rate": 2.9138850574712644e-05, "loss": 0.2121, "step": 9128 }, { "epoch": 31.479310344827585, "grad_norm": 0.6074782609939575, "learning_rate": 2.9138390804597703e-05, "loss": 0.1746, "step": 9129 }, { "epoch": 31.482758620689655, "grad_norm": 1.1778755187988281, "learning_rate": 2.9137931034482758e-05, "loss": 0.2009, "step": 9130 }, { "epoch": 31.486206896551725, "grad_norm": 0.6735582947731018, "learning_rate": 2.9137471264367817e-05, "loss": 0.1729, "step": 9131 }, { "epoch": 31.489655172413794, "grad_norm": 1.074430227279663, "learning_rate": 2.9137011494252875e-05, "loss": 0.1929, "step": 9132 }, { "epoch": 31.49310344827586, "grad_norm": 2.3830156326293945, "learning_rate": 2.913655172413793e-05, "loss": 0.183, "step": 9133 }, { "epoch": 31.49655172413793, "grad_norm": 0.9035771489143372, "learning_rate": 2.913609195402299e-05, "loss": 0.1969, "step": 9134 }, { "epoch": 31.5, "grad_norm": 0.8801381587982178, "learning_rate": 2.9135632183908044e-05, "loss": 0.2032, "step": 9135 }, { "epoch": 31.50344827586207, "grad_norm": 0.7187477350234985, "learning_rate": 2.9135172413793107e-05, "loss": 0.176, "step": 9136 }, { "epoch": 31.50689655172414, "grad_norm": 0.8311159014701843, "learning_rate": 2.9134712643678162e-05, "loss": 0.1672, "step": 9137 }, { "epoch": 31.510344827586206, "grad_norm": 4.7187275886535645, "learning_rate": 2.9134252873563217e-05, "loss": 0.1762, "step": 9138 }, { "epoch": 31.513793103448275, "grad_norm": 0.9831600189208984, "learning_rate": 2.9133793103448276e-05, "loss": 0.1891, "step": 9139 }, { "epoch": 31.517241379310345, "grad_norm": 1.3953535556793213, "learning_rate": 2.9133333333333334e-05, "loss": 0.2435, "step": 9140 }, { "epoch": 31.520689655172415, "grad_norm": 0.7277474403381348, "learning_rate": 2.9132873563218393e-05, "loss": 0.2763, "step": 9141 }, { "epoch": 31.52413793103448, "grad_norm": 0.9509769082069397, "learning_rate": 2.913241379310345e-05, "loss": 0.2207, "step": 9142 }, { "epoch": 31.52758620689655, "grad_norm": 1.357343316078186, "learning_rate": 2.9131954022988504e-05, "loss": 0.2233, "step": 9143 }, { "epoch": 31.53103448275862, "grad_norm": 1.0554805994033813, "learning_rate": 2.9131494252873566e-05, "loss": 0.2122, "step": 9144 }, { "epoch": 31.53448275862069, "grad_norm": 0.8440026640892029, "learning_rate": 2.913103448275862e-05, "loss": 0.2195, "step": 9145 }, { "epoch": 31.53793103448276, "grad_norm": 1.4648282527923584, "learning_rate": 2.913057471264368e-05, "loss": 0.2105, "step": 9146 }, { "epoch": 31.541379310344826, "grad_norm": 1.057260513305664, "learning_rate": 2.9130114942528735e-05, "loss": 0.211, "step": 9147 }, { "epoch": 31.544827586206896, "grad_norm": 1.0992907285690308, "learning_rate": 2.9129655172413794e-05, "loss": 0.2337, "step": 9148 }, { "epoch": 31.548275862068966, "grad_norm": 0.568139910697937, "learning_rate": 2.9129195402298852e-05, "loss": 0.1872, "step": 9149 }, { "epoch": 31.551724137931036, "grad_norm": 0.540550947189331, "learning_rate": 2.9128735632183908e-05, "loss": 0.211, "step": 9150 }, { "epoch": 31.555172413793102, "grad_norm": 0.8271404504776001, "learning_rate": 2.9128275862068966e-05, "loss": 0.2089, "step": 9151 }, { "epoch": 31.55862068965517, "grad_norm": 0.7933440208435059, "learning_rate": 2.9127816091954025e-05, "loss": 0.1968, "step": 9152 }, { "epoch": 31.56206896551724, "grad_norm": 1.1954914331436157, "learning_rate": 2.912735632183908e-05, "loss": 0.1921, "step": 9153 }, { "epoch": 31.56551724137931, "grad_norm": 0.6992223262786865, "learning_rate": 2.912689655172414e-05, "loss": 0.2043, "step": 9154 }, { "epoch": 31.56896551724138, "grad_norm": 0.626611590385437, "learning_rate": 2.9126436781609194e-05, "loss": 0.2179, "step": 9155 }, { "epoch": 31.572413793103447, "grad_norm": 2.4052329063415527, "learning_rate": 2.9125977011494253e-05, "loss": 0.1881, "step": 9156 }, { "epoch": 31.575862068965517, "grad_norm": 0.811055064201355, "learning_rate": 2.9125517241379312e-05, "loss": 0.1993, "step": 9157 }, { "epoch": 31.579310344827586, "grad_norm": 0.6470239758491516, "learning_rate": 2.9125057471264367e-05, "loss": 0.2194, "step": 9158 }, { "epoch": 31.582758620689656, "grad_norm": 0.8397538065910339, "learning_rate": 2.9124597701149426e-05, "loss": 0.2015, "step": 9159 }, { "epoch": 31.586206896551722, "grad_norm": 0.6602575778961182, "learning_rate": 2.9124137931034484e-05, "loss": 0.1786, "step": 9160 }, { "epoch": 31.589655172413792, "grad_norm": 1.2697054147720337, "learning_rate": 2.912367816091954e-05, "loss": 0.1763, "step": 9161 }, { "epoch": 31.593103448275862, "grad_norm": 0.9888803958892822, "learning_rate": 2.91232183908046e-05, "loss": 0.1939, "step": 9162 }, { "epoch": 31.59655172413793, "grad_norm": 0.8314052224159241, "learning_rate": 2.9122758620689654e-05, "loss": 0.1581, "step": 9163 }, { "epoch": 31.6, "grad_norm": 1.5839649438858032, "learning_rate": 2.9122298850574716e-05, "loss": 0.1957, "step": 9164 }, { "epoch": 31.603448275862068, "grad_norm": 1.8257949352264404, "learning_rate": 2.912183908045977e-05, "loss": 0.2731, "step": 9165 }, { "epoch": 31.606896551724137, "grad_norm": 0.5351074934005737, "learning_rate": 2.9121379310344826e-05, "loss": 0.2694, "step": 9166 }, { "epoch": 31.610344827586207, "grad_norm": 0.7545520067214966, "learning_rate": 2.9120919540229885e-05, "loss": 0.2322, "step": 9167 }, { "epoch": 31.613793103448277, "grad_norm": 0.7911497950553894, "learning_rate": 2.9120459770114944e-05, "loss": 0.2179, "step": 9168 }, { "epoch": 31.617241379310343, "grad_norm": 0.9675735831260681, "learning_rate": 2.9120000000000002e-05, "loss": 0.2271, "step": 9169 }, { "epoch": 31.620689655172413, "grad_norm": 1.9580886363983154, "learning_rate": 2.9119540229885058e-05, "loss": 0.1822, "step": 9170 }, { "epoch": 31.624137931034483, "grad_norm": 0.5204646587371826, "learning_rate": 2.9119080459770113e-05, "loss": 0.1927, "step": 9171 }, { "epoch": 31.627586206896552, "grad_norm": 0.5125941634178162, "learning_rate": 2.9118620689655175e-05, "loss": 0.1977, "step": 9172 }, { "epoch": 31.631034482758622, "grad_norm": 0.8592856526374817, "learning_rate": 2.911816091954023e-05, "loss": 0.2313, "step": 9173 }, { "epoch": 31.63448275862069, "grad_norm": 1.021958351135254, "learning_rate": 2.911770114942529e-05, "loss": 0.2408, "step": 9174 }, { "epoch": 31.637931034482758, "grad_norm": 1.2649706602096558, "learning_rate": 2.9117241379310344e-05, "loss": 0.2009, "step": 9175 }, { "epoch": 31.641379310344828, "grad_norm": 0.7404921650886536, "learning_rate": 2.9116781609195403e-05, "loss": 0.2265, "step": 9176 }, { "epoch": 31.644827586206898, "grad_norm": 1.2051349878311157, "learning_rate": 2.9116321839080462e-05, "loss": 0.2021, "step": 9177 }, { "epoch": 31.648275862068964, "grad_norm": 1.2091715335845947, "learning_rate": 2.9115862068965517e-05, "loss": 0.1862, "step": 9178 }, { "epoch": 31.651724137931033, "grad_norm": 0.8483291864395142, "learning_rate": 2.9115402298850576e-05, "loss": 0.1975, "step": 9179 }, { "epoch": 31.655172413793103, "grad_norm": 0.6722620129585266, "learning_rate": 2.9114942528735634e-05, "loss": 0.1837, "step": 9180 }, { "epoch": 31.658620689655173, "grad_norm": 1.4093706607818604, "learning_rate": 2.911448275862069e-05, "loss": 0.193, "step": 9181 }, { "epoch": 31.662068965517243, "grad_norm": 2.3791911602020264, "learning_rate": 2.911402298850575e-05, "loss": 0.1867, "step": 9182 }, { "epoch": 31.66551724137931, "grad_norm": 1.4362162351608276, "learning_rate": 2.9113563218390804e-05, "loss": 0.1892, "step": 9183 }, { "epoch": 31.66896551724138, "grad_norm": 1.262203574180603, "learning_rate": 2.9113103448275862e-05, "loss": 0.1872, "step": 9184 }, { "epoch": 31.67241379310345, "grad_norm": 1.7604271173477173, "learning_rate": 2.911264367816092e-05, "loss": 0.1923, "step": 9185 }, { "epoch": 31.675862068965518, "grad_norm": 1.0771105289459229, "learning_rate": 2.9112183908045976e-05, "loss": 0.1868, "step": 9186 }, { "epoch": 31.679310344827588, "grad_norm": 0.8779042363166809, "learning_rate": 2.9111724137931035e-05, "loss": 0.2145, "step": 9187 }, { "epoch": 31.682758620689654, "grad_norm": 1.008802890777588, "learning_rate": 2.9111264367816094e-05, "loss": 0.1775, "step": 9188 }, { "epoch": 31.686206896551724, "grad_norm": 1.3151097297668457, "learning_rate": 2.911080459770115e-05, "loss": 0.243, "step": 9189 }, { "epoch": 31.689655172413794, "grad_norm": 1.2163472175598145, "learning_rate": 2.9110344827586208e-05, "loss": 0.2322, "step": 9190 }, { "epoch": 31.693103448275863, "grad_norm": 0.909041702747345, "learning_rate": 2.9109885057471263e-05, "loss": 0.2949, "step": 9191 }, { "epoch": 31.69655172413793, "grad_norm": 1.073359727859497, "learning_rate": 2.9109425287356325e-05, "loss": 0.2542, "step": 9192 }, { "epoch": 31.7, "grad_norm": 1.3511426448822021, "learning_rate": 2.910896551724138e-05, "loss": 0.2353, "step": 9193 }, { "epoch": 31.70344827586207, "grad_norm": 1.2039899826049805, "learning_rate": 2.9108505747126436e-05, "loss": 0.2463, "step": 9194 }, { "epoch": 31.70689655172414, "grad_norm": 1.1834625005722046, "learning_rate": 2.9108045977011494e-05, "loss": 0.2261, "step": 9195 }, { "epoch": 31.71034482758621, "grad_norm": 1.1829406023025513, "learning_rate": 2.9107586206896553e-05, "loss": 0.2078, "step": 9196 }, { "epoch": 31.713793103448275, "grad_norm": 0.786767303943634, "learning_rate": 2.910712643678161e-05, "loss": 0.2228, "step": 9197 }, { "epoch": 31.717241379310344, "grad_norm": 0.9926084876060486, "learning_rate": 2.9106666666666667e-05, "loss": 0.2113, "step": 9198 }, { "epoch": 31.720689655172414, "grad_norm": 1.1210691928863525, "learning_rate": 2.9106206896551722e-05, "loss": 0.199, "step": 9199 }, { "epoch": 31.724137931034484, "grad_norm": 0.9526610374450684, "learning_rate": 2.9105747126436784e-05, "loss": 0.2009, "step": 9200 }, { "epoch": 31.72758620689655, "grad_norm": 0.8696818351745605, "learning_rate": 2.910528735632184e-05, "loss": 0.1908, "step": 9201 }, { "epoch": 31.73103448275862, "grad_norm": 0.6560194492340088, "learning_rate": 2.9104827586206898e-05, "loss": 0.2048, "step": 9202 }, { "epoch": 31.73448275862069, "grad_norm": 0.7419978380203247, "learning_rate": 2.9104367816091954e-05, "loss": 0.2123, "step": 9203 }, { "epoch": 31.73793103448276, "grad_norm": 0.7485631704330444, "learning_rate": 2.9103908045977012e-05, "loss": 0.1926, "step": 9204 }, { "epoch": 31.74137931034483, "grad_norm": 1.2929327487945557, "learning_rate": 2.910344827586207e-05, "loss": 0.2201, "step": 9205 }, { "epoch": 31.744827586206895, "grad_norm": 0.7122382521629333, "learning_rate": 2.9102988505747126e-05, "loss": 0.2092, "step": 9206 }, { "epoch": 31.748275862068965, "grad_norm": 0.8883162140846252, "learning_rate": 2.9102528735632185e-05, "loss": 0.1831, "step": 9207 }, { "epoch": 31.751724137931035, "grad_norm": 1.3620537519454956, "learning_rate": 2.9102068965517244e-05, "loss": 0.2002, "step": 9208 }, { "epoch": 31.755172413793105, "grad_norm": 1.6075636148452759, "learning_rate": 2.91016091954023e-05, "loss": 0.1758, "step": 9209 }, { "epoch": 31.75862068965517, "grad_norm": 0.9116566181182861, "learning_rate": 2.9101149425287358e-05, "loss": 0.2225, "step": 9210 }, { "epoch": 31.76206896551724, "grad_norm": 0.9894418716430664, "learning_rate": 2.9100689655172413e-05, "loss": 0.1982, "step": 9211 }, { "epoch": 31.76551724137931, "grad_norm": 1.0580153465270996, "learning_rate": 2.9100229885057475e-05, "loss": 0.1843, "step": 9212 }, { "epoch": 31.76896551724138, "grad_norm": 1.3247480392456055, "learning_rate": 2.909977011494253e-05, "loss": 0.179, "step": 9213 }, { "epoch": 31.77241379310345, "grad_norm": 1.0629276037216187, "learning_rate": 2.9099310344827586e-05, "loss": 0.1862, "step": 9214 }, { "epoch": 31.775862068965516, "grad_norm": 1.3166168928146362, "learning_rate": 2.9098850574712644e-05, "loss": 0.2857, "step": 9215 }, { "epoch": 31.779310344827586, "grad_norm": 1.0339090824127197, "learning_rate": 2.9098390804597703e-05, "loss": 0.2498, "step": 9216 }, { "epoch": 31.782758620689656, "grad_norm": 0.8398151993751526, "learning_rate": 2.9097931034482758e-05, "loss": 0.2432, "step": 9217 }, { "epoch": 31.786206896551725, "grad_norm": 0.5004904270172119, "learning_rate": 2.9097471264367817e-05, "loss": 0.2349, "step": 9218 }, { "epoch": 31.78965517241379, "grad_norm": 0.6397691965103149, "learning_rate": 2.9097011494252872e-05, "loss": 0.2413, "step": 9219 }, { "epoch": 31.79310344827586, "grad_norm": 0.804019033908844, "learning_rate": 2.9096551724137934e-05, "loss": 0.2393, "step": 9220 }, { "epoch": 31.79655172413793, "grad_norm": 0.6845957636833191, "learning_rate": 2.909609195402299e-05, "loss": 0.242, "step": 9221 }, { "epoch": 31.8, "grad_norm": 0.7254197597503662, "learning_rate": 2.9095632183908045e-05, "loss": 0.2416, "step": 9222 }, { "epoch": 31.80344827586207, "grad_norm": 0.5260263085365295, "learning_rate": 2.9095172413793104e-05, "loss": 0.2029, "step": 9223 }, { "epoch": 31.806896551724137, "grad_norm": 1.8257062435150146, "learning_rate": 2.9094712643678162e-05, "loss": 0.193, "step": 9224 }, { "epoch": 31.810344827586206, "grad_norm": 0.7957234978675842, "learning_rate": 2.909425287356322e-05, "loss": 0.2001, "step": 9225 }, { "epoch": 31.813793103448276, "grad_norm": 0.8612109422683716, "learning_rate": 2.9093793103448276e-05, "loss": 0.1896, "step": 9226 }, { "epoch": 31.817241379310346, "grad_norm": 0.6882291436195374, "learning_rate": 2.909333333333333e-05, "loss": 0.1988, "step": 9227 }, { "epoch": 31.820689655172412, "grad_norm": 4.32401180267334, "learning_rate": 2.9092873563218394e-05, "loss": 0.2291, "step": 9228 }, { "epoch": 31.824137931034482, "grad_norm": 0.9017851948738098, "learning_rate": 2.909241379310345e-05, "loss": 0.2101, "step": 9229 }, { "epoch": 31.82758620689655, "grad_norm": 0.7663598656654358, "learning_rate": 2.9091954022988508e-05, "loss": 0.2236, "step": 9230 }, { "epoch": 31.83103448275862, "grad_norm": 0.9390891194343567, "learning_rate": 2.9091494252873563e-05, "loss": 0.2263, "step": 9231 }, { "epoch": 31.83448275862069, "grad_norm": 0.8501238822937012, "learning_rate": 2.909103448275862e-05, "loss": 0.1845, "step": 9232 }, { "epoch": 31.837931034482757, "grad_norm": 2.116063117980957, "learning_rate": 2.909057471264368e-05, "loss": 0.1862, "step": 9233 }, { "epoch": 31.841379310344827, "grad_norm": 1.1394318342208862, "learning_rate": 2.9090114942528735e-05, "loss": 0.2067, "step": 9234 }, { "epoch": 31.844827586206897, "grad_norm": 0.6084781289100647, "learning_rate": 2.9089655172413794e-05, "loss": 0.1851, "step": 9235 }, { "epoch": 31.848275862068967, "grad_norm": 1.2749007940292358, "learning_rate": 2.9089195402298853e-05, "loss": 0.216, "step": 9236 }, { "epoch": 31.851724137931036, "grad_norm": 1.2515087127685547, "learning_rate": 2.9088735632183908e-05, "loss": 0.1767, "step": 9237 }, { "epoch": 31.855172413793102, "grad_norm": 2.235698699951172, "learning_rate": 2.9088275862068967e-05, "loss": 0.1985, "step": 9238 }, { "epoch": 31.858620689655172, "grad_norm": 2.024125337600708, "learning_rate": 2.9087816091954022e-05, "loss": 0.2318, "step": 9239 }, { "epoch": 31.862068965517242, "grad_norm": 2.004445791244507, "learning_rate": 2.9087356321839084e-05, "loss": 0.2927, "step": 9240 }, { "epoch": 31.86551724137931, "grad_norm": 1.0538582801818848, "learning_rate": 2.908689655172414e-05, "loss": 0.2295, "step": 9241 }, { "epoch": 31.868965517241378, "grad_norm": 0.6127817630767822, "learning_rate": 2.9086436781609195e-05, "loss": 0.2336, "step": 9242 }, { "epoch": 31.872413793103448, "grad_norm": 0.7370874285697937, "learning_rate": 2.9085977011494253e-05, "loss": 0.2558, "step": 9243 }, { "epoch": 31.875862068965517, "grad_norm": 1.1077499389648438, "learning_rate": 2.9085517241379312e-05, "loss": 0.207, "step": 9244 }, { "epoch": 31.879310344827587, "grad_norm": 1.6749738454818726, "learning_rate": 2.9085057471264367e-05, "loss": 0.2196, "step": 9245 }, { "epoch": 31.882758620689657, "grad_norm": 0.657370924949646, "learning_rate": 2.9084597701149426e-05, "loss": 0.2228, "step": 9246 }, { "epoch": 31.886206896551723, "grad_norm": 0.5046843886375427, "learning_rate": 2.908413793103448e-05, "loss": 0.2088, "step": 9247 }, { "epoch": 31.889655172413793, "grad_norm": 0.8538951873779297, "learning_rate": 2.9083678160919543e-05, "loss": 0.2372, "step": 9248 }, { "epoch": 31.893103448275863, "grad_norm": 0.6142867207527161, "learning_rate": 2.90832183908046e-05, "loss": 0.1886, "step": 9249 }, { "epoch": 31.896551724137932, "grad_norm": 1.167072057723999, "learning_rate": 2.9082758620689654e-05, "loss": 0.1928, "step": 9250 }, { "epoch": 31.9, "grad_norm": 1.055858850479126, "learning_rate": 2.9082298850574713e-05, "loss": 0.2098, "step": 9251 }, { "epoch": 31.90344827586207, "grad_norm": 0.8354953527450562, "learning_rate": 2.908183908045977e-05, "loss": 0.203, "step": 9252 }, { "epoch": 31.906896551724138, "grad_norm": 0.6298380494117737, "learning_rate": 2.908137931034483e-05, "loss": 0.2003, "step": 9253 }, { "epoch": 31.910344827586208, "grad_norm": 0.9869764447212219, "learning_rate": 2.9080919540229885e-05, "loss": 0.1931, "step": 9254 }, { "epoch": 31.913793103448278, "grad_norm": 1.4037564992904663, "learning_rate": 2.908045977011494e-05, "loss": 0.209, "step": 9255 }, { "epoch": 31.917241379310344, "grad_norm": 0.5574031472206116, "learning_rate": 2.9080000000000003e-05, "loss": 0.1961, "step": 9256 }, { "epoch": 31.920689655172414, "grad_norm": 0.6671339869499207, "learning_rate": 2.9079540229885058e-05, "loss": 0.2041, "step": 9257 }, { "epoch": 31.924137931034483, "grad_norm": 1.2319304943084717, "learning_rate": 2.9079080459770117e-05, "loss": 0.211, "step": 9258 }, { "epoch": 31.927586206896553, "grad_norm": 1.0458396673202515, "learning_rate": 2.9078620689655172e-05, "loss": 0.1824, "step": 9259 }, { "epoch": 31.93103448275862, "grad_norm": 0.6420140266418457, "learning_rate": 2.907816091954023e-05, "loss": 0.2084, "step": 9260 }, { "epoch": 31.93448275862069, "grad_norm": 0.6985346674919128, "learning_rate": 2.907770114942529e-05, "loss": 0.1658, "step": 9261 }, { "epoch": 31.93793103448276, "grad_norm": 1.5254524946212769, "learning_rate": 2.9077241379310345e-05, "loss": 0.1734, "step": 9262 }, { "epoch": 31.94137931034483, "grad_norm": 0.7172805666923523, "learning_rate": 2.9076781609195403e-05, "loss": 0.174, "step": 9263 }, { "epoch": 31.944827586206898, "grad_norm": 1.5204192399978638, "learning_rate": 2.9076321839080462e-05, "loss": 0.1728, "step": 9264 }, { "epoch": 31.948275862068964, "grad_norm": 3.1639745235443115, "learning_rate": 2.9075862068965517e-05, "loss": 0.2486, "step": 9265 }, { "epoch": 31.951724137931034, "grad_norm": 0.7205151319503784, "learning_rate": 2.9075402298850576e-05, "loss": 0.2374, "step": 9266 }, { "epoch": 31.955172413793104, "grad_norm": 0.6615118384361267, "learning_rate": 2.907494252873563e-05, "loss": 0.2382, "step": 9267 }, { "epoch": 31.958620689655174, "grad_norm": 0.6549364924430847, "learning_rate": 2.9074482758620693e-05, "loss": 0.2289, "step": 9268 }, { "epoch": 31.96206896551724, "grad_norm": 1.270033359527588, "learning_rate": 2.907402298850575e-05, "loss": 0.2488, "step": 9269 }, { "epoch": 31.96551724137931, "grad_norm": 0.8199211955070496, "learning_rate": 2.9073563218390804e-05, "loss": 0.2135, "step": 9270 }, { "epoch": 31.96896551724138, "grad_norm": 0.6463923454284668, "learning_rate": 2.9073103448275863e-05, "loss": 0.2248, "step": 9271 }, { "epoch": 31.97241379310345, "grad_norm": 0.64622962474823, "learning_rate": 2.907264367816092e-05, "loss": 0.2016, "step": 9272 }, { "epoch": 31.97586206896552, "grad_norm": 0.637248158454895, "learning_rate": 2.9072183908045977e-05, "loss": 0.1977, "step": 9273 }, { "epoch": 31.979310344827585, "grad_norm": 0.7458120584487915, "learning_rate": 2.9071724137931035e-05, "loss": 0.178, "step": 9274 }, { "epoch": 31.982758620689655, "grad_norm": 1.0046128034591675, "learning_rate": 2.907126436781609e-05, "loss": 0.1753, "step": 9275 }, { "epoch": 31.986206896551725, "grad_norm": 0.722682535648346, "learning_rate": 2.9070804597701153e-05, "loss": 0.198, "step": 9276 }, { "epoch": 31.989655172413794, "grad_norm": 0.6199116706848145, "learning_rate": 2.9070344827586208e-05, "loss": 0.1853, "step": 9277 }, { "epoch": 31.99310344827586, "grad_norm": 2.259931802749634, "learning_rate": 2.9069885057471263e-05, "loss": 0.1789, "step": 9278 }, { "epoch": 31.99655172413793, "grad_norm": 4.735611915588379, "learning_rate": 2.9069425287356322e-05, "loss": 0.1886, "step": 9279 }, { "epoch": 32.0, "grad_norm": 1.001079797744751, "learning_rate": 2.906896551724138e-05, "loss": 0.2674, "step": 9280 }, { "epoch": 32.00344827586207, "grad_norm": 0.7680208683013916, "learning_rate": 2.906850574712644e-05, "loss": 0.249, "step": 9281 }, { "epoch": 32.00689655172414, "grad_norm": 1.3584803342819214, "learning_rate": 2.9068045977011495e-05, "loss": 0.268, "step": 9282 }, { "epoch": 32.01034482758621, "grad_norm": 0.9722003936767578, "learning_rate": 2.906758620689655e-05, "loss": 0.244, "step": 9283 }, { "epoch": 32.01379310344828, "grad_norm": 0.6446970105171204, "learning_rate": 2.9067126436781612e-05, "loss": 0.2104, "step": 9284 }, { "epoch": 32.01724137931034, "grad_norm": 0.6231624484062195, "learning_rate": 2.9066666666666667e-05, "loss": 0.193, "step": 9285 }, { "epoch": 32.02068965517241, "grad_norm": 0.8282118439674377, "learning_rate": 2.9066206896551726e-05, "loss": 0.1908, "step": 9286 }, { "epoch": 32.02413793103448, "grad_norm": 0.7866252660751343, "learning_rate": 2.906574712643678e-05, "loss": 0.2277, "step": 9287 }, { "epoch": 32.02758620689655, "grad_norm": 0.651033341884613, "learning_rate": 2.906528735632184e-05, "loss": 0.1806, "step": 9288 }, { "epoch": 32.03103448275862, "grad_norm": 0.7935547828674316, "learning_rate": 2.90648275862069e-05, "loss": 0.184, "step": 9289 }, { "epoch": 32.03448275862069, "grad_norm": 0.7006465792655945, "learning_rate": 2.9064367816091954e-05, "loss": 0.2053, "step": 9290 }, { "epoch": 32.03793103448276, "grad_norm": 0.6932304501533508, "learning_rate": 2.9063908045977013e-05, "loss": 0.1953, "step": 9291 }, { "epoch": 32.04137931034483, "grad_norm": 0.936317503452301, "learning_rate": 2.906344827586207e-05, "loss": 0.1907, "step": 9292 }, { "epoch": 32.0448275862069, "grad_norm": 1.3094724416732788, "learning_rate": 2.9062988505747127e-05, "loss": 0.1965, "step": 9293 }, { "epoch": 32.04827586206896, "grad_norm": 1.127447485923767, "learning_rate": 2.9062528735632185e-05, "loss": 0.1949, "step": 9294 }, { "epoch": 32.05172413793103, "grad_norm": 1.6607422828674316, "learning_rate": 2.906206896551724e-05, "loss": 0.2088, "step": 9295 }, { "epoch": 32.0551724137931, "grad_norm": 0.8247566223144531, "learning_rate": 2.9061609195402303e-05, "loss": 0.1731, "step": 9296 }, { "epoch": 32.05862068965517, "grad_norm": 0.6218279600143433, "learning_rate": 2.9061149425287358e-05, "loss": 0.1741, "step": 9297 }, { "epoch": 32.06206896551724, "grad_norm": 0.6085888743400574, "learning_rate": 2.9060689655172413e-05, "loss": 0.1787, "step": 9298 }, { "epoch": 32.06551724137931, "grad_norm": 1.2128791809082031, "learning_rate": 2.9060229885057472e-05, "loss": 0.1651, "step": 9299 }, { "epoch": 32.06896551724138, "grad_norm": 0.626512885093689, "learning_rate": 2.905977011494253e-05, "loss": 0.1794, "step": 9300 }, { "epoch": 32.07241379310345, "grad_norm": 1.008880376815796, "learning_rate": 2.905931034482759e-05, "loss": 0.1643, "step": 9301 }, { "epoch": 32.07586206896552, "grad_norm": 2.9030961990356445, "learning_rate": 2.9058850574712645e-05, "loss": 0.1723, "step": 9302 }, { "epoch": 32.07931034482758, "grad_norm": 1.3467941284179688, "learning_rate": 2.90583908045977e-05, "loss": 0.1753, "step": 9303 }, { "epoch": 32.08275862068965, "grad_norm": 4.577706336975098, "learning_rate": 2.9057931034482762e-05, "loss": 0.1573, "step": 9304 }, { "epoch": 32.08620689655172, "grad_norm": 3.2632317543029785, "learning_rate": 2.9057471264367817e-05, "loss": 0.2516, "step": 9305 }, { "epoch": 32.08965517241379, "grad_norm": 1.0958375930786133, "learning_rate": 2.9057011494252873e-05, "loss": 0.2546, "step": 9306 }, { "epoch": 32.09310344827586, "grad_norm": 0.6618547439575195, "learning_rate": 2.905655172413793e-05, "loss": 0.229, "step": 9307 }, { "epoch": 32.09655172413793, "grad_norm": 0.5942777991294861, "learning_rate": 2.905609195402299e-05, "loss": 0.2035, "step": 9308 }, { "epoch": 32.1, "grad_norm": 1.0319565534591675, "learning_rate": 2.905563218390805e-05, "loss": 0.2221, "step": 9309 }, { "epoch": 32.10344827586207, "grad_norm": 0.8617600202560425, "learning_rate": 2.9055172413793104e-05, "loss": 0.2366, "step": 9310 }, { "epoch": 32.10689655172414, "grad_norm": 0.7476131916046143, "learning_rate": 2.905471264367816e-05, "loss": 0.2089, "step": 9311 }, { "epoch": 32.110344827586204, "grad_norm": 0.7499658465385437, "learning_rate": 2.905425287356322e-05, "loss": 0.2143, "step": 9312 }, { "epoch": 32.11379310344827, "grad_norm": 0.7056556344032288, "learning_rate": 2.9053793103448277e-05, "loss": 0.1999, "step": 9313 }, { "epoch": 32.11724137931034, "grad_norm": 1.055824637413025, "learning_rate": 2.9053333333333335e-05, "loss": 0.2192, "step": 9314 }, { "epoch": 32.12068965517241, "grad_norm": 0.5125923156738281, "learning_rate": 2.905287356321839e-05, "loss": 0.1848, "step": 9315 }, { "epoch": 32.12413793103448, "grad_norm": 0.4783141314983368, "learning_rate": 2.905241379310345e-05, "loss": 0.1876, "step": 9316 }, { "epoch": 32.12758620689655, "grad_norm": 0.8559415340423584, "learning_rate": 2.9051954022988508e-05, "loss": 0.1753, "step": 9317 }, { "epoch": 32.13103448275862, "grad_norm": 1.5815485715866089, "learning_rate": 2.9051494252873563e-05, "loss": 0.1695, "step": 9318 }, { "epoch": 32.13448275862069, "grad_norm": 0.7754355072975159, "learning_rate": 2.9051034482758622e-05, "loss": 0.1923, "step": 9319 }, { "epoch": 32.13793103448276, "grad_norm": 1.6404038667678833, "learning_rate": 2.905057471264368e-05, "loss": 0.1921, "step": 9320 }, { "epoch": 32.141379310344824, "grad_norm": 0.8564251065254211, "learning_rate": 2.9050114942528736e-05, "loss": 0.1955, "step": 9321 }, { "epoch": 32.144827586206894, "grad_norm": 0.7002036571502686, "learning_rate": 2.9049655172413795e-05, "loss": 0.1981, "step": 9322 }, { "epoch": 32.148275862068964, "grad_norm": 0.7759421467781067, "learning_rate": 2.904919540229885e-05, "loss": 0.191, "step": 9323 }, { "epoch": 32.15172413793103, "grad_norm": 0.8782743215560913, "learning_rate": 2.9048735632183912e-05, "loss": 0.1852, "step": 9324 }, { "epoch": 32.1551724137931, "grad_norm": 0.8564130663871765, "learning_rate": 2.9048275862068967e-05, "loss": 0.1594, "step": 9325 }, { "epoch": 32.15862068965517, "grad_norm": 2.9441685676574707, "learning_rate": 2.9047816091954023e-05, "loss": 0.1798, "step": 9326 }, { "epoch": 32.16206896551724, "grad_norm": 0.9159011244773865, "learning_rate": 2.904735632183908e-05, "loss": 0.1533, "step": 9327 }, { "epoch": 32.16551724137931, "grad_norm": 1.0333093404769897, "learning_rate": 2.904689655172414e-05, "loss": 0.1765, "step": 9328 }, { "epoch": 32.16896551724138, "grad_norm": 0.8572070002555847, "learning_rate": 2.90464367816092e-05, "loss": 0.1972, "step": 9329 }, { "epoch": 32.172413793103445, "grad_norm": 1.3214503526687622, "learning_rate": 2.9045977011494254e-05, "loss": 0.2429, "step": 9330 }, { "epoch": 32.175862068965515, "grad_norm": 0.5381466150283813, "learning_rate": 2.904551724137931e-05, "loss": 0.2571, "step": 9331 }, { "epoch": 32.179310344827584, "grad_norm": 0.6298054456710815, "learning_rate": 2.904505747126437e-05, "loss": 0.231, "step": 9332 }, { "epoch": 32.182758620689654, "grad_norm": 0.6027193069458008, "learning_rate": 2.9044597701149427e-05, "loss": 0.2106, "step": 9333 }, { "epoch": 32.186206896551724, "grad_norm": 0.6037879586219788, "learning_rate": 2.9044137931034482e-05, "loss": 0.2071, "step": 9334 }, { "epoch": 32.189655172413794, "grad_norm": 0.7580541372299194, "learning_rate": 2.904367816091954e-05, "loss": 0.2104, "step": 9335 }, { "epoch": 32.19310344827586, "grad_norm": 0.6205865740776062, "learning_rate": 2.90432183908046e-05, "loss": 0.2023, "step": 9336 }, { "epoch": 32.19655172413793, "grad_norm": 0.8159546256065369, "learning_rate": 2.9042758620689658e-05, "loss": 0.1917, "step": 9337 }, { "epoch": 32.2, "grad_norm": 0.551213800907135, "learning_rate": 2.9042298850574713e-05, "loss": 0.1887, "step": 9338 }, { "epoch": 32.203448275862065, "grad_norm": 0.5646641850471497, "learning_rate": 2.904183908045977e-05, "loss": 0.1782, "step": 9339 }, { "epoch": 32.206896551724135, "grad_norm": 0.5622018575668335, "learning_rate": 2.904137931034483e-05, "loss": 0.2048, "step": 9340 }, { "epoch": 32.210344827586205, "grad_norm": 1.2085890769958496, "learning_rate": 2.9040919540229886e-05, "loss": 0.1983, "step": 9341 }, { "epoch": 32.213793103448275, "grad_norm": 0.7723403573036194, "learning_rate": 2.9040459770114944e-05, "loss": 0.2068, "step": 9342 }, { "epoch": 32.217241379310344, "grad_norm": 1.2515873908996582, "learning_rate": 2.904e-05, "loss": 0.1799, "step": 9343 }, { "epoch": 32.220689655172414, "grad_norm": 0.9153546094894409, "learning_rate": 2.903954022988506e-05, "loss": 0.1924, "step": 9344 }, { "epoch": 32.224137931034484, "grad_norm": 1.331316351890564, "learning_rate": 2.9039080459770117e-05, "loss": 0.2024, "step": 9345 }, { "epoch": 32.227586206896554, "grad_norm": 1.4157207012176514, "learning_rate": 2.9038620689655172e-05, "loss": 0.1944, "step": 9346 }, { "epoch": 32.23103448275862, "grad_norm": 0.9131737947463989, "learning_rate": 2.903816091954023e-05, "loss": 0.1612, "step": 9347 }, { "epoch": 32.234482758620686, "grad_norm": 1.388870358467102, "learning_rate": 2.903770114942529e-05, "loss": 0.1837, "step": 9348 }, { "epoch": 32.237931034482756, "grad_norm": 1.2862471342086792, "learning_rate": 2.9037241379310345e-05, "loss": 0.1753, "step": 9349 }, { "epoch": 32.241379310344826, "grad_norm": 1.1260071992874146, "learning_rate": 2.9036781609195404e-05, "loss": 0.1667, "step": 9350 }, { "epoch": 32.244827586206895, "grad_norm": 0.9871659874916077, "learning_rate": 2.903632183908046e-05, "loss": 0.1804, "step": 9351 }, { "epoch": 32.248275862068965, "grad_norm": 1.2042601108551025, "learning_rate": 2.9035862068965518e-05, "loss": 0.1869, "step": 9352 }, { "epoch": 32.251724137931035, "grad_norm": 0.8537313342094421, "learning_rate": 2.9035402298850576e-05, "loss": 0.1673, "step": 9353 }, { "epoch": 32.255172413793105, "grad_norm": 2.1527740955352783, "learning_rate": 2.9034942528735632e-05, "loss": 0.1993, "step": 9354 }, { "epoch": 32.258620689655174, "grad_norm": 1.6597460508346558, "learning_rate": 2.903448275862069e-05, "loss": 0.2631, "step": 9355 }, { "epoch": 32.262068965517244, "grad_norm": 0.9980902671813965, "learning_rate": 2.9034022988505746e-05, "loss": 0.2429, "step": 9356 }, { "epoch": 32.265517241379314, "grad_norm": 1.182389259338379, "learning_rate": 2.9033563218390808e-05, "loss": 0.2078, "step": 9357 }, { "epoch": 32.26896551724138, "grad_norm": 1.0411518812179565, "learning_rate": 2.9033103448275863e-05, "loss": 0.2348, "step": 9358 }, { "epoch": 32.272413793103446, "grad_norm": 0.7669999003410339, "learning_rate": 2.903264367816092e-05, "loss": 0.2241, "step": 9359 }, { "epoch": 32.275862068965516, "grad_norm": 0.8695125579833984, "learning_rate": 2.9032183908045977e-05, "loss": 0.2434, "step": 9360 }, { "epoch": 32.279310344827586, "grad_norm": 0.5956897735595703, "learning_rate": 2.9031724137931036e-05, "loss": 0.2222, "step": 9361 }, { "epoch": 32.282758620689656, "grad_norm": 0.5369762182235718, "learning_rate": 2.903126436781609e-05, "loss": 0.215, "step": 9362 }, { "epoch": 32.286206896551725, "grad_norm": 0.753838837146759, "learning_rate": 2.903080459770115e-05, "loss": 0.2051, "step": 9363 }, { "epoch": 32.289655172413795, "grad_norm": 1.2522414922714233, "learning_rate": 2.9030344827586205e-05, "loss": 0.1982, "step": 9364 }, { "epoch": 32.293103448275865, "grad_norm": 0.7660573720932007, "learning_rate": 2.9029885057471267e-05, "loss": 0.186, "step": 9365 }, { "epoch": 32.296551724137935, "grad_norm": 0.9687782526016235, "learning_rate": 2.9029425287356322e-05, "loss": 0.2281, "step": 9366 }, { "epoch": 32.3, "grad_norm": 0.6053619384765625, "learning_rate": 2.9028965517241378e-05, "loss": 0.1929, "step": 9367 }, { "epoch": 32.30344827586207, "grad_norm": 1.6313188076019287, "learning_rate": 2.9028505747126436e-05, "loss": 0.1889, "step": 9368 }, { "epoch": 32.30689655172414, "grad_norm": 1.70514976978302, "learning_rate": 2.9028045977011495e-05, "loss": 0.1652, "step": 9369 }, { "epoch": 32.310344827586206, "grad_norm": 0.9079357385635376, "learning_rate": 2.9027586206896554e-05, "loss": 0.1994, "step": 9370 }, { "epoch": 32.313793103448276, "grad_norm": 0.949357807636261, "learning_rate": 2.902712643678161e-05, "loss": 0.1766, "step": 9371 }, { "epoch": 32.317241379310346, "grad_norm": 0.7559463381767273, "learning_rate": 2.9026666666666664e-05, "loss": 0.1765, "step": 9372 }, { "epoch": 32.320689655172416, "grad_norm": 0.6851814985275269, "learning_rate": 2.9026206896551726e-05, "loss": 0.1749, "step": 9373 }, { "epoch": 32.324137931034485, "grad_norm": 0.7651326060295105, "learning_rate": 2.902574712643678e-05, "loss": 0.178, "step": 9374 }, { "epoch": 32.327586206896555, "grad_norm": 0.7497700452804565, "learning_rate": 2.902528735632184e-05, "loss": 0.1656, "step": 9375 }, { "epoch": 32.33103448275862, "grad_norm": 1.0203782320022583, "learning_rate": 2.9024827586206896e-05, "loss": 0.1842, "step": 9376 }, { "epoch": 32.33448275862069, "grad_norm": 0.7930234670639038, "learning_rate": 2.9024367816091954e-05, "loss": 0.1671, "step": 9377 }, { "epoch": 32.33793103448276, "grad_norm": 0.7099402546882629, "learning_rate": 2.9023908045977013e-05, "loss": 0.1645, "step": 9378 }, { "epoch": 32.34137931034483, "grad_norm": 1.0862784385681152, "learning_rate": 2.902344827586207e-05, "loss": 0.1659, "step": 9379 }, { "epoch": 32.3448275862069, "grad_norm": 1.6542696952819824, "learning_rate": 2.9022988505747127e-05, "loss": 0.2345, "step": 9380 }, { "epoch": 32.34827586206897, "grad_norm": 0.877650260925293, "learning_rate": 2.9022528735632186e-05, "loss": 0.2884, "step": 9381 }, { "epoch": 32.351724137931036, "grad_norm": 0.9248872995376587, "learning_rate": 2.902206896551724e-05, "loss": 0.2322, "step": 9382 }, { "epoch": 32.355172413793106, "grad_norm": 0.536134660243988, "learning_rate": 2.90216091954023e-05, "loss": 0.2443, "step": 9383 }, { "epoch": 32.358620689655176, "grad_norm": 0.502812385559082, "learning_rate": 2.9021149425287355e-05, "loss": 0.2077, "step": 9384 }, { "epoch": 32.36206896551724, "grad_norm": 0.4535665810108185, "learning_rate": 2.9020689655172417e-05, "loss": 0.1829, "step": 9385 }, { "epoch": 32.36551724137931, "grad_norm": 1.1164729595184326, "learning_rate": 2.9020229885057472e-05, "loss": 0.1978, "step": 9386 }, { "epoch": 32.36896551724138, "grad_norm": 1.0373355150222778, "learning_rate": 2.9019770114942528e-05, "loss": 0.2081, "step": 9387 }, { "epoch": 32.37241379310345, "grad_norm": 0.5720630884170532, "learning_rate": 2.9019310344827586e-05, "loss": 0.2155, "step": 9388 }, { "epoch": 32.37586206896552, "grad_norm": 0.8583043813705444, "learning_rate": 2.9018850574712645e-05, "loss": 0.2097, "step": 9389 }, { "epoch": 32.37931034482759, "grad_norm": 0.6809021234512329, "learning_rate": 2.9018390804597704e-05, "loss": 0.1933, "step": 9390 }, { "epoch": 32.38275862068966, "grad_norm": 0.5480522513389587, "learning_rate": 2.901793103448276e-05, "loss": 0.1827, "step": 9391 }, { "epoch": 32.38620689655173, "grad_norm": 0.5833609700202942, "learning_rate": 2.9017471264367814e-05, "loss": 0.1816, "step": 9392 }, { "epoch": 32.389655172413796, "grad_norm": 0.7202111482620239, "learning_rate": 2.9017011494252876e-05, "loss": 0.176, "step": 9393 }, { "epoch": 32.39310344827586, "grad_norm": 0.6256267428398132, "learning_rate": 2.901655172413793e-05, "loss": 0.1798, "step": 9394 }, { "epoch": 32.39655172413793, "grad_norm": 0.7197169661521912, "learning_rate": 2.9016091954022987e-05, "loss": 0.1922, "step": 9395 }, { "epoch": 32.4, "grad_norm": 1.3044102191925049, "learning_rate": 2.9015632183908046e-05, "loss": 0.1917, "step": 9396 }, { "epoch": 32.40344827586207, "grad_norm": 0.6761364340782166, "learning_rate": 2.9015172413793104e-05, "loss": 0.1788, "step": 9397 }, { "epoch": 32.40689655172414, "grad_norm": 0.9494683146476746, "learning_rate": 2.9014712643678163e-05, "loss": 0.1665, "step": 9398 }, { "epoch": 32.41034482758621, "grad_norm": 0.8946865200996399, "learning_rate": 2.9014252873563218e-05, "loss": 0.2315, "step": 9399 }, { "epoch": 32.41379310344828, "grad_norm": 0.817347526550293, "learning_rate": 2.9013793103448274e-05, "loss": 0.1929, "step": 9400 }, { "epoch": 32.41724137931035, "grad_norm": 0.8284268975257874, "learning_rate": 2.9013333333333336e-05, "loss": 0.1799, "step": 9401 }, { "epoch": 32.42068965517242, "grad_norm": 1.0746431350708008, "learning_rate": 2.901287356321839e-05, "loss": 0.172, "step": 9402 }, { "epoch": 32.42413793103448, "grad_norm": 1.1316617727279663, "learning_rate": 2.901241379310345e-05, "loss": 0.1762, "step": 9403 }, { "epoch": 32.42758620689655, "grad_norm": 1.0676836967468262, "learning_rate": 2.9011954022988505e-05, "loss": 0.2033, "step": 9404 }, { "epoch": 32.43103448275862, "grad_norm": 0.8445324897766113, "learning_rate": 2.9011494252873564e-05, "loss": 0.2094, "step": 9405 }, { "epoch": 32.43448275862069, "grad_norm": 0.5919508337974548, "learning_rate": 2.9011034482758622e-05, "loss": 0.2658, "step": 9406 }, { "epoch": 32.43793103448276, "grad_norm": 0.7345144152641296, "learning_rate": 2.9010574712643678e-05, "loss": 0.2197, "step": 9407 }, { "epoch": 32.44137931034483, "grad_norm": 0.6499236226081848, "learning_rate": 2.9010114942528736e-05, "loss": 0.2294, "step": 9408 }, { "epoch": 32.4448275862069, "grad_norm": 0.6350103616714478, "learning_rate": 2.9009655172413795e-05, "loss": 0.2426, "step": 9409 }, { "epoch": 32.44827586206897, "grad_norm": 0.7638291716575623, "learning_rate": 2.900919540229885e-05, "loss": 0.2046, "step": 9410 }, { "epoch": 32.45172413793104, "grad_norm": 0.5633493661880493, "learning_rate": 2.900873563218391e-05, "loss": 0.198, "step": 9411 }, { "epoch": 32.4551724137931, "grad_norm": 1.6715658903121948, "learning_rate": 2.9008275862068964e-05, "loss": 0.2026, "step": 9412 }, { "epoch": 32.45862068965517, "grad_norm": 0.562752902507782, "learning_rate": 2.9007816091954026e-05, "loss": 0.2101, "step": 9413 }, { "epoch": 32.46206896551724, "grad_norm": 0.7466509938240051, "learning_rate": 2.900735632183908e-05, "loss": 0.2069, "step": 9414 }, { "epoch": 32.46551724137931, "grad_norm": 0.5234197974205017, "learning_rate": 2.9006896551724137e-05, "loss": 0.2044, "step": 9415 }, { "epoch": 32.46896551724138, "grad_norm": 0.7364910840988159, "learning_rate": 2.9006436781609196e-05, "loss": 0.189, "step": 9416 }, { "epoch": 32.47241379310345, "grad_norm": 0.7125122547149658, "learning_rate": 2.9005977011494254e-05, "loss": 0.2227, "step": 9417 }, { "epoch": 32.47586206896552, "grad_norm": 0.7143125534057617, "learning_rate": 2.9005517241379313e-05, "loss": 0.2007, "step": 9418 }, { "epoch": 32.47931034482759, "grad_norm": 0.649418294429779, "learning_rate": 2.9005057471264368e-05, "loss": 0.2099, "step": 9419 }, { "epoch": 32.48275862068966, "grad_norm": 1.3305355310440063, "learning_rate": 2.9004597701149424e-05, "loss": 0.2048, "step": 9420 }, { "epoch": 32.48620689655172, "grad_norm": 0.8276295065879822, "learning_rate": 2.9004137931034486e-05, "loss": 0.1789, "step": 9421 }, { "epoch": 32.48965517241379, "grad_norm": 0.9554473757743835, "learning_rate": 2.900367816091954e-05, "loss": 0.197, "step": 9422 }, { "epoch": 32.49310344827586, "grad_norm": 4.0845723152160645, "learning_rate": 2.9003218390804596e-05, "loss": 0.1729, "step": 9423 }, { "epoch": 32.49655172413793, "grad_norm": 0.560257613658905, "learning_rate": 2.9002758620689655e-05, "loss": 0.1943, "step": 9424 }, { "epoch": 32.5, "grad_norm": 0.9479022026062012, "learning_rate": 2.9002298850574714e-05, "loss": 0.1601, "step": 9425 }, { "epoch": 32.50344827586207, "grad_norm": 1.5041331052780151, "learning_rate": 2.9001839080459772e-05, "loss": 0.167, "step": 9426 }, { "epoch": 32.50689655172414, "grad_norm": 1.1062895059585571, "learning_rate": 2.9001379310344828e-05, "loss": 0.166, "step": 9427 }, { "epoch": 32.51034482758621, "grad_norm": 1.7988169193267822, "learning_rate": 2.9000919540229883e-05, "loss": 0.1967, "step": 9428 }, { "epoch": 32.51379310344828, "grad_norm": 0.8637951612472534, "learning_rate": 2.9000459770114945e-05, "loss": 0.2, "step": 9429 }, { "epoch": 32.51724137931034, "grad_norm": 1.2009773254394531, "learning_rate": 2.9e-05, "loss": 0.2535, "step": 9430 }, { "epoch": 32.52068965517241, "grad_norm": 0.9069499373435974, "learning_rate": 2.899954022988506e-05, "loss": 0.2677, "step": 9431 }, { "epoch": 32.52413793103448, "grad_norm": 0.5994694828987122, "learning_rate": 2.8999080459770114e-05, "loss": 0.2114, "step": 9432 }, { "epoch": 32.52758620689655, "grad_norm": 0.7608529925346375, "learning_rate": 2.8998620689655173e-05, "loss": 0.2064, "step": 9433 }, { "epoch": 32.53103448275862, "grad_norm": 1.5952856540679932, "learning_rate": 2.899816091954023e-05, "loss": 0.2484, "step": 9434 }, { "epoch": 32.53448275862069, "grad_norm": 0.5877231955528259, "learning_rate": 2.8997701149425287e-05, "loss": 0.2041, "step": 9435 }, { "epoch": 32.53793103448276, "grad_norm": 1.8378756046295166, "learning_rate": 2.8997241379310345e-05, "loss": 0.2026, "step": 9436 }, { "epoch": 32.54137931034483, "grad_norm": 0.9834261536598206, "learning_rate": 2.8996781609195404e-05, "loss": 0.2024, "step": 9437 }, { "epoch": 32.5448275862069, "grad_norm": 0.5531467199325562, "learning_rate": 2.899632183908046e-05, "loss": 0.2087, "step": 9438 }, { "epoch": 32.54827586206896, "grad_norm": 0.6372510194778442, "learning_rate": 2.8995862068965518e-05, "loss": 0.1908, "step": 9439 }, { "epoch": 32.55172413793103, "grad_norm": 0.5459098219871521, "learning_rate": 2.8995402298850573e-05, "loss": 0.1747, "step": 9440 }, { "epoch": 32.5551724137931, "grad_norm": 0.5371068716049194, "learning_rate": 2.8994942528735636e-05, "loss": 0.1961, "step": 9441 }, { "epoch": 32.55862068965517, "grad_norm": 0.8060145378112793, "learning_rate": 2.899448275862069e-05, "loss": 0.1984, "step": 9442 }, { "epoch": 32.56206896551724, "grad_norm": 0.8164899349212646, "learning_rate": 2.8994022988505746e-05, "loss": 0.1903, "step": 9443 }, { "epoch": 32.56551724137931, "grad_norm": 0.623616635799408, "learning_rate": 2.8993563218390805e-05, "loss": 0.2056, "step": 9444 }, { "epoch": 32.56896551724138, "grad_norm": 0.7804853916168213, "learning_rate": 2.8993103448275863e-05, "loss": 0.1947, "step": 9445 }, { "epoch": 32.57241379310345, "grad_norm": 1.2832609415054321, "learning_rate": 2.8992643678160922e-05, "loss": 0.1783, "step": 9446 }, { "epoch": 32.57586206896552, "grad_norm": 1.0102812051773071, "learning_rate": 2.8992183908045977e-05, "loss": 0.1951, "step": 9447 }, { "epoch": 32.57931034482758, "grad_norm": 0.792366087436676, "learning_rate": 2.8991724137931033e-05, "loss": 0.1926, "step": 9448 }, { "epoch": 32.58275862068965, "grad_norm": 0.7945685982704163, "learning_rate": 2.8991264367816095e-05, "loss": 0.1737, "step": 9449 }, { "epoch": 32.58620689655172, "grad_norm": 1.5853503942489624, "learning_rate": 2.899080459770115e-05, "loss": 0.1816, "step": 9450 }, { "epoch": 32.58965517241379, "grad_norm": 0.9002912640571594, "learning_rate": 2.8990344827586205e-05, "loss": 0.1656, "step": 9451 }, { "epoch": 32.59310344827586, "grad_norm": 3.0754449367523193, "learning_rate": 2.8989885057471264e-05, "loss": 0.1908, "step": 9452 }, { "epoch": 32.59655172413793, "grad_norm": 1.2035315036773682, "learning_rate": 2.8989425287356323e-05, "loss": 0.1805, "step": 9453 }, { "epoch": 32.6, "grad_norm": 1.723464846611023, "learning_rate": 2.898896551724138e-05, "loss": 0.1996, "step": 9454 }, { "epoch": 32.60344827586207, "grad_norm": 5.169252872467041, "learning_rate": 2.8988505747126437e-05, "loss": 0.2691, "step": 9455 }, { "epoch": 32.60689655172414, "grad_norm": 1.2627429962158203, "learning_rate": 2.8988045977011492e-05, "loss": 0.2638, "step": 9456 }, { "epoch": 32.610344827586204, "grad_norm": 0.7497335076332092, "learning_rate": 2.8987586206896554e-05, "loss": 0.2246, "step": 9457 }, { "epoch": 32.61379310344827, "grad_norm": 0.8262646198272705, "learning_rate": 2.898712643678161e-05, "loss": 0.2146, "step": 9458 }, { "epoch": 32.61724137931034, "grad_norm": 0.5863873958587646, "learning_rate": 2.8986666666666668e-05, "loss": 0.2385, "step": 9459 }, { "epoch": 32.62068965517241, "grad_norm": 0.6652276515960693, "learning_rate": 2.8986206896551723e-05, "loss": 0.2146, "step": 9460 }, { "epoch": 32.62413793103448, "grad_norm": 0.8378130197525024, "learning_rate": 2.8985747126436782e-05, "loss": 0.2365, "step": 9461 }, { "epoch": 32.62758620689655, "grad_norm": 0.6695242524147034, "learning_rate": 2.898528735632184e-05, "loss": 0.2124, "step": 9462 }, { "epoch": 32.63103448275862, "grad_norm": 0.7903420329093933, "learning_rate": 2.8984827586206896e-05, "loss": 0.2254, "step": 9463 }, { "epoch": 32.63448275862069, "grad_norm": 0.6357313990592957, "learning_rate": 2.8984367816091955e-05, "loss": 0.1896, "step": 9464 }, { "epoch": 32.63793103448276, "grad_norm": 1.3026487827301025, "learning_rate": 2.8983908045977013e-05, "loss": 0.2001, "step": 9465 }, { "epoch": 32.641379310344824, "grad_norm": 0.5914021134376526, "learning_rate": 2.898344827586207e-05, "loss": 0.1976, "step": 9466 }, { "epoch": 32.644827586206894, "grad_norm": 1.0661534070968628, "learning_rate": 2.8982988505747127e-05, "loss": 0.2202, "step": 9467 }, { "epoch": 32.648275862068964, "grad_norm": 1.7499066591262817, "learning_rate": 2.8982528735632183e-05, "loss": 0.1752, "step": 9468 }, { "epoch": 32.65172413793103, "grad_norm": 0.9589762687683105, "learning_rate": 2.8982068965517245e-05, "loss": 0.1964, "step": 9469 }, { "epoch": 32.6551724137931, "grad_norm": 0.7482517957687378, "learning_rate": 2.89816091954023e-05, "loss": 0.1735, "step": 9470 }, { "epoch": 32.65862068965517, "grad_norm": 1.3955963850021362, "learning_rate": 2.8981149425287355e-05, "loss": 0.1688, "step": 9471 }, { "epoch": 32.66206896551724, "grad_norm": 1.0386537313461304, "learning_rate": 2.8980689655172414e-05, "loss": 0.1642, "step": 9472 }, { "epoch": 32.66551724137931, "grad_norm": 0.76963210105896, "learning_rate": 2.8980229885057473e-05, "loss": 0.1684, "step": 9473 }, { "epoch": 32.66896551724138, "grad_norm": 0.6056795120239258, "learning_rate": 2.897977011494253e-05, "loss": 0.1988, "step": 9474 }, { "epoch": 32.672413793103445, "grad_norm": 3.1522367000579834, "learning_rate": 2.8979310344827587e-05, "loss": 0.1699, "step": 9475 }, { "epoch": 32.675862068965515, "grad_norm": 1.8094652891159058, "learning_rate": 2.8978850574712642e-05, "loss": 0.1694, "step": 9476 }, { "epoch": 32.679310344827584, "grad_norm": 1.7877135276794434, "learning_rate": 2.8978390804597704e-05, "loss": 0.1842, "step": 9477 }, { "epoch": 32.682758620689654, "grad_norm": 1.1798828840255737, "learning_rate": 2.897793103448276e-05, "loss": 0.1915, "step": 9478 }, { "epoch": 32.686206896551724, "grad_norm": 0.9954628944396973, "learning_rate": 2.8977471264367818e-05, "loss": 0.197, "step": 9479 }, { "epoch": 32.689655172413794, "grad_norm": 1.4995770454406738, "learning_rate": 2.8977011494252873e-05, "loss": 0.2692, "step": 9480 }, { "epoch": 32.69310344827586, "grad_norm": 0.7894964218139648, "learning_rate": 2.8976551724137932e-05, "loss": 0.2734, "step": 9481 }, { "epoch": 32.69655172413793, "grad_norm": 0.7171152234077454, "learning_rate": 2.897609195402299e-05, "loss": 0.2489, "step": 9482 }, { "epoch": 32.7, "grad_norm": 1.267428994178772, "learning_rate": 2.8975632183908046e-05, "loss": 0.2394, "step": 9483 }, { "epoch": 32.703448275862065, "grad_norm": 0.624915599822998, "learning_rate": 2.89751724137931e-05, "loss": 0.2029, "step": 9484 }, { "epoch": 32.706896551724135, "grad_norm": 0.7769904136657715, "learning_rate": 2.8974712643678163e-05, "loss": 0.2307, "step": 9485 }, { "epoch": 32.710344827586205, "grad_norm": 0.608521580696106, "learning_rate": 2.897425287356322e-05, "loss": 0.2206, "step": 9486 }, { "epoch": 32.713793103448275, "grad_norm": 0.9478812217712402, "learning_rate": 2.8973793103448277e-05, "loss": 0.2071, "step": 9487 }, { "epoch": 32.717241379310344, "grad_norm": 0.8165191411972046, "learning_rate": 2.8973333333333333e-05, "loss": 0.2356, "step": 9488 }, { "epoch": 32.720689655172414, "grad_norm": 0.5164327621459961, "learning_rate": 2.897287356321839e-05, "loss": 0.1829, "step": 9489 }, { "epoch": 32.724137931034484, "grad_norm": 0.7211045026779175, "learning_rate": 2.897241379310345e-05, "loss": 0.2182, "step": 9490 }, { "epoch": 32.727586206896554, "grad_norm": 0.6153923273086548, "learning_rate": 2.8971954022988505e-05, "loss": 0.1878, "step": 9491 }, { "epoch": 32.73103448275862, "grad_norm": 0.5507876873016357, "learning_rate": 2.8971494252873564e-05, "loss": 0.1913, "step": 9492 }, { "epoch": 32.734482758620686, "grad_norm": 1.1601125001907349, "learning_rate": 2.8971034482758623e-05, "loss": 0.2266, "step": 9493 }, { "epoch": 32.737931034482756, "grad_norm": 0.8615549206733704, "learning_rate": 2.8970574712643678e-05, "loss": 0.2227, "step": 9494 }, { "epoch": 32.741379310344826, "grad_norm": 1.5257315635681152, "learning_rate": 2.8970114942528737e-05, "loss": 0.1887, "step": 9495 }, { "epoch": 32.744827586206895, "grad_norm": 0.8748583197593689, "learning_rate": 2.8969655172413792e-05, "loss": 0.1778, "step": 9496 }, { "epoch": 32.748275862068965, "grad_norm": 0.6897594928741455, "learning_rate": 2.8969195402298854e-05, "loss": 0.1827, "step": 9497 }, { "epoch": 32.751724137931035, "grad_norm": 0.5734062790870667, "learning_rate": 2.896873563218391e-05, "loss": 0.1785, "step": 9498 }, { "epoch": 32.755172413793105, "grad_norm": 1.0156610012054443, "learning_rate": 2.8968275862068965e-05, "loss": 0.1967, "step": 9499 }, { "epoch": 32.758620689655174, "grad_norm": 1.4094021320343018, "learning_rate": 2.8967816091954023e-05, "loss": 0.1896, "step": 9500 }, { "epoch": 32.762068965517244, "grad_norm": 0.877655029296875, "learning_rate": 2.8967356321839082e-05, "loss": 0.2044, "step": 9501 }, { "epoch": 32.765517241379314, "grad_norm": 0.9001066088676453, "learning_rate": 2.896689655172414e-05, "loss": 0.1768, "step": 9502 }, { "epoch": 32.76896551724138, "grad_norm": 1.6215566396713257, "learning_rate": 2.8966436781609196e-05, "loss": 0.1927, "step": 9503 }, { "epoch": 32.772413793103446, "grad_norm": 1.20802640914917, "learning_rate": 2.896597701149425e-05, "loss": 0.2126, "step": 9504 }, { "epoch": 32.775862068965516, "grad_norm": 1.728591799736023, "learning_rate": 2.8965517241379313e-05, "loss": 0.2351, "step": 9505 }, { "epoch": 32.779310344827586, "grad_norm": 1.0931291580200195, "learning_rate": 2.896505747126437e-05, "loss": 0.2689, "step": 9506 }, { "epoch": 32.782758620689656, "grad_norm": 0.653667151927948, "learning_rate": 2.8964597701149427e-05, "loss": 0.2247, "step": 9507 }, { "epoch": 32.786206896551725, "grad_norm": 1.136795997619629, "learning_rate": 2.8964137931034483e-05, "loss": 0.2335, "step": 9508 }, { "epoch": 32.789655172413795, "grad_norm": 0.8149865865707397, "learning_rate": 2.896367816091954e-05, "loss": 0.2162, "step": 9509 }, { "epoch": 32.793103448275865, "grad_norm": 0.6141611933708191, "learning_rate": 2.89632183908046e-05, "loss": 0.2261, "step": 9510 }, { "epoch": 32.796551724137935, "grad_norm": 3.9719791412353516, "learning_rate": 2.8962758620689655e-05, "loss": 0.2302, "step": 9511 }, { "epoch": 32.8, "grad_norm": 1.1686209440231323, "learning_rate": 2.896229885057471e-05, "loss": 0.2163, "step": 9512 }, { "epoch": 32.80344827586207, "grad_norm": 1.6076295375823975, "learning_rate": 2.8961839080459773e-05, "loss": 0.2182, "step": 9513 }, { "epoch": 32.80689655172414, "grad_norm": 0.8838937282562256, "learning_rate": 2.8961379310344828e-05, "loss": 0.2232, "step": 9514 }, { "epoch": 32.810344827586206, "grad_norm": 0.8460537791252136, "learning_rate": 2.8960919540229887e-05, "loss": 0.227, "step": 9515 }, { "epoch": 32.813793103448276, "grad_norm": 0.6369902491569519, "learning_rate": 2.8960459770114942e-05, "loss": 0.2038, "step": 9516 }, { "epoch": 32.817241379310346, "grad_norm": 0.7650838494300842, "learning_rate": 2.896e-05, "loss": 0.2002, "step": 9517 }, { "epoch": 32.820689655172416, "grad_norm": 1.1495555639266968, "learning_rate": 2.895954022988506e-05, "loss": 0.2112, "step": 9518 }, { "epoch": 32.824137931034485, "grad_norm": 0.7184934616088867, "learning_rate": 2.8959080459770115e-05, "loss": 0.1978, "step": 9519 }, { "epoch": 32.827586206896555, "grad_norm": 1.0528403520584106, "learning_rate": 2.8958620689655173e-05, "loss": 0.2078, "step": 9520 }, { "epoch": 32.83103448275862, "grad_norm": 0.7273347973823547, "learning_rate": 2.8958160919540232e-05, "loss": 0.1845, "step": 9521 }, { "epoch": 32.83448275862069, "grad_norm": 0.637885570526123, "learning_rate": 2.8957701149425287e-05, "loss": 0.1975, "step": 9522 }, { "epoch": 32.83793103448276, "grad_norm": 0.8730015754699707, "learning_rate": 2.8957241379310346e-05, "loss": 0.199, "step": 9523 }, { "epoch": 32.84137931034483, "grad_norm": 0.7798984050750732, "learning_rate": 2.89567816091954e-05, "loss": 0.179, "step": 9524 }, { "epoch": 32.8448275862069, "grad_norm": 1.088426113128662, "learning_rate": 2.8956321839080463e-05, "loss": 0.1841, "step": 9525 }, { "epoch": 32.84827586206897, "grad_norm": 2.523158311843872, "learning_rate": 2.895586206896552e-05, "loss": 0.1957, "step": 9526 }, { "epoch": 32.851724137931036, "grad_norm": 0.7921714186668396, "learning_rate": 2.8955402298850574e-05, "loss": 0.1707, "step": 9527 }, { "epoch": 32.855172413793106, "grad_norm": 1.3467600345611572, "learning_rate": 2.8954942528735633e-05, "loss": 0.2002, "step": 9528 }, { "epoch": 32.858620689655176, "grad_norm": 1.2583808898925781, "learning_rate": 2.895448275862069e-05, "loss": 0.1888, "step": 9529 }, { "epoch": 32.86206896551724, "grad_norm": 1.4048094749450684, "learning_rate": 2.895402298850575e-05, "loss": 0.2207, "step": 9530 }, { "epoch": 32.86551724137931, "grad_norm": 0.741363525390625, "learning_rate": 2.8953563218390805e-05, "loss": 0.2918, "step": 9531 }, { "epoch": 32.86896551724138, "grad_norm": 0.7162971496582031, "learning_rate": 2.895310344827586e-05, "loss": 0.2242, "step": 9532 }, { "epoch": 32.87241379310345, "grad_norm": 0.6412495970726013, "learning_rate": 2.8952643678160923e-05, "loss": 0.2553, "step": 9533 }, { "epoch": 32.87586206896552, "grad_norm": 0.6063274145126343, "learning_rate": 2.8952183908045978e-05, "loss": 0.2113, "step": 9534 }, { "epoch": 32.87931034482759, "grad_norm": 0.690453052520752, "learning_rate": 2.8951724137931037e-05, "loss": 0.207, "step": 9535 }, { "epoch": 32.88275862068966, "grad_norm": 0.6673157215118408, "learning_rate": 2.8951264367816092e-05, "loss": 0.2113, "step": 9536 }, { "epoch": 32.88620689655173, "grad_norm": 0.7401515245437622, "learning_rate": 2.895080459770115e-05, "loss": 0.2201, "step": 9537 }, { "epoch": 32.889655172413796, "grad_norm": 0.7432984113693237, "learning_rate": 2.895034482758621e-05, "loss": 0.2205, "step": 9538 }, { "epoch": 32.89310344827586, "grad_norm": 0.4818425476551056, "learning_rate": 2.8949885057471264e-05, "loss": 0.1951, "step": 9539 }, { "epoch": 32.89655172413793, "grad_norm": 1.155902624130249, "learning_rate": 2.894942528735632e-05, "loss": 0.2002, "step": 9540 }, { "epoch": 32.9, "grad_norm": 0.6277615427970886, "learning_rate": 2.8948965517241382e-05, "loss": 0.2025, "step": 9541 }, { "epoch": 32.90344827586207, "grad_norm": 0.7743133902549744, "learning_rate": 2.8948505747126437e-05, "loss": 0.1904, "step": 9542 }, { "epoch": 32.90689655172414, "grad_norm": 0.9111828804016113, "learning_rate": 2.8948045977011496e-05, "loss": 0.1898, "step": 9543 }, { "epoch": 32.91034482758621, "grad_norm": 0.83240807056427, "learning_rate": 2.894758620689655e-05, "loss": 0.1864, "step": 9544 }, { "epoch": 32.91379310344828, "grad_norm": 1.701643705368042, "learning_rate": 2.894712643678161e-05, "loss": 0.1786, "step": 9545 }, { "epoch": 32.91724137931035, "grad_norm": 0.6378130912780762, "learning_rate": 2.894666666666667e-05, "loss": 0.1827, "step": 9546 }, { "epoch": 32.92068965517242, "grad_norm": 0.891890287399292, "learning_rate": 2.8946206896551724e-05, "loss": 0.1893, "step": 9547 }, { "epoch": 32.92413793103448, "grad_norm": 0.7035164833068848, "learning_rate": 2.8945747126436782e-05, "loss": 0.1813, "step": 9548 }, { "epoch": 32.92758620689655, "grad_norm": 1.522223949432373, "learning_rate": 2.894528735632184e-05, "loss": 0.1807, "step": 9549 }, { "epoch": 32.93103448275862, "grad_norm": 0.9132722616195679, "learning_rate": 2.8944827586206896e-05, "loss": 0.2088, "step": 9550 }, { "epoch": 32.93448275862069, "grad_norm": 0.6483951210975647, "learning_rate": 2.8944367816091955e-05, "loss": 0.1705, "step": 9551 }, { "epoch": 32.93793103448276, "grad_norm": 2.2289865016937256, "learning_rate": 2.894390804597701e-05, "loss": 0.178, "step": 9552 }, { "epoch": 32.94137931034483, "grad_norm": 1.3460434675216675, "learning_rate": 2.8943448275862072e-05, "loss": 0.1779, "step": 9553 }, { "epoch": 32.9448275862069, "grad_norm": 1.4497132301330566, "learning_rate": 2.8942988505747128e-05, "loss": 0.2023, "step": 9554 }, { "epoch": 32.94827586206897, "grad_norm": 1.658878207206726, "learning_rate": 2.8942528735632183e-05, "loss": 0.2764, "step": 9555 }, { "epoch": 32.95172413793104, "grad_norm": 1.4840457439422607, "learning_rate": 2.8942068965517242e-05, "loss": 0.2767, "step": 9556 }, { "epoch": 32.9551724137931, "grad_norm": 0.6650686860084534, "learning_rate": 2.89416091954023e-05, "loss": 0.2216, "step": 9557 }, { "epoch": 32.95862068965517, "grad_norm": 0.5478830337524414, "learning_rate": 2.894114942528736e-05, "loss": 0.2191, "step": 9558 }, { "epoch": 32.96206896551724, "grad_norm": 0.5430249571800232, "learning_rate": 2.8940689655172414e-05, "loss": 0.2212, "step": 9559 }, { "epoch": 32.96551724137931, "grad_norm": 0.6922717690467834, "learning_rate": 2.894022988505747e-05, "loss": 0.1973, "step": 9560 }, { "epoch": 32.96896551724138, "grad_norm": 1.9069888591766357, "learning_rate": 2.8939770114942532e-05, "loss": 0.1865, "step": 9561 }, { "epoch": 32.97241379310345, "grad_norm": 1.187598705291748, "learning_rate": 2.8939310344827587e-05, "loss": 0.2116, "step": 9562 }, { "epoch": 32.97586206896552, "grad_norm": 1.9681123495101929, "learning_rate": 2.8938850574712646e-05, "loss": 0.1921, "step": 9563 }, { "epoch": 32.97931034482759, "grad_norm": 0.8672161102294922, "learning_rate": 2.89383908045977e-05, "loss": 0.2112, "step": 9564 }, { "epoch": 32.98275862068966, "grad_norm": 1.1190611124038696, "learning_rate": 2.893793103448276e-05, "loss": 0.219, "step": 9565 }, { "epoch": 32.98620689655172, "grad_norm": 0.9699251651763916, "learning_rate": 2.893747126436782e-05, "loss": 0.1808, "step": 9566 }, { "epoch": 32.98965517241379, "grad_norm": 1.6472867727279663, "learning_rate": 2.8937011494252874e-05, "loss": 0.2119, "step": 9567 }, { "epoch": 32.99310344827586, "grad_norm": 0.7951877117156982, "learning_rate": 2.8936551724137932e-05, "loss": 0.1905, "step": 9568 }, { "epoch": 32.99655172413793, "grad_norm": 1.4893581867218018, "learning_rate": 2.893609195402299e-05, "loss": 0.1881, "step": 9569 }, { "epoch": 33.0, "grad_norm": 1.314724326133728, "learning_rate": 2.8935632183908046e-05, "loss": 0.2737, "step": 9570 }, { "epoch": 33.00344827586207, "grad_norm": 0.47111329436302185, "learning_rate": 2.8935172413793105e-05, "loss": 0.2223, "step": 9571 }, { "epoch": 33.00689655172414, "grad_norm": 0.6784752011299133, "learning_rate": 2.893471264367816e-05, "loss": 0.2277, "step": 9572 }, { "epoch": 33.01034482758621, "grad_norm": 0.7716693878173828, "learning_rate": 2.893425287356322e-05, "loss": 0.1984, "step": 9573 }, { "epoch": 33.01379310344828, "grad_norm": 1.0803477764129639, "learning_rate": 2.8933793103448278e-05, "loss": 0.2353, "step": 9574 }, { "epoch": 33.01724137931034, "grad_norm": 0.49280187487602234, "learning_rate": 2.8933333333333333e-05, "loss": 0.211, "step": 9575 }, { "epoch": 33.02068965517241, "grad_norm": 1.2159006595611572, "learning_rate": 2.8932873563218392e-05, "loss": 0.203, "step": 9576 }, { "epoch": 33.02413793103448, "grad_norm": 0.7683629989624023, "learning_rate": 2.893241379310345e-05, "loss": 0.2209, "step": 9577 }, { "epoch": 33.02758620689655, "grad_norm": 0.7219715714454651, "learning_rate": 2.8931954022988506e-05, "loss": 0.2145, "step": 9578 }, { "epoch": 33.03103448275862, "grad_norm": 1.7682223320007324, "learning_rate": 2.8931494252873564e-05, "loss": 0.21, "step": 9579 }, { "epoch": 33.03448275862069, "grad_norm": 0.4927560091018677, "learning_rate": 2.893103448275862e-05, "loss": 0.1942, "step": 9580 }, { "epoch": 33.03793103448276, "grad_norm": 0.6255135536193848, "learning_rate": 2.8930574712643682e-05, "loss": 0.1857, "step": 9581 }, { "epoch": 33.04137931034483, "grad_norm": 0.9502967000007629, "learning_rate": 2.8930114942528737e-05, "loss": 0.2042, "step": 9582 }, { "epoch": 33.0448275862069, "grad_norm": 0.9068130254745483, "learning_rate": 2.8929655172413792e-05, "loss": 0.1891, "step": 9583 }, { "epoch": 33.04827586206896, "grad_norm": 1.388093113899231, "learning_rate": 2.892919540229885e-05, "loss": 0.1964, "step": 9584 }, { "epoch": 33.05172413793103, "grad_norm": 0.7690166234970093, "learning_rate": 2.892873563218391e-05, "loss": 0.1759, "step": 9585 }, { "epoch": 33.0551724137931, "grad_norm": 0.9070937633514404, "learning_rate": 2.892827586206897e-05, "loss": 0.1932, "step": 9586 }, { "epoch": 33.05862068965517, "grad_norm": 0.8489810824394226, "learning_rate": 2.8927816091954024e-05, "loss": 0.1644, "step": 9587 }, { "epoch": 33.06206896551724, "grad_norm": 0.8698198199272156, "learning_rate": 2.892735632183908e-05, "loss": 0.1886, "step": 9588 }, { "epoch": 33.06551724137931, "grad_norm": 0.8486262559890747, "learning_rate": 2.892689655172414e-05, "loss": 0.1629, "step": 9589 }, { "epoch": 33.06896551724138, "grad_norm": 1.5708526372909546, "learning_rate": 2.8926436781609196e-05, "loss": 0.1821, "step": 9590 }, { "epoch": 33.07241379310345, "grad_norm": 0.7279483675956726, "learning_rate": 2.8925977011494255e-05, "loss": 0.1699, "step": 9591 }, { "epoch": 33.07586206896552, "grad_norm": 1.150964617729187, "learning_rate": 2.892551724137931e-05, "loss": 0.162, "step": 9592 }, { "epoch": 33.07931034482758, "grad_norm": 1.750604510307312, "learning_rate": 2.892505747126437e-05, "loss": 0.1873, "step": 9593 }, { "epoch": 33.08275862068965, "grad_norm": 1.2724244594573975, "learning_rate": 2.8924597701149428e-05, "loss": 0.1812, "step": 9594 }, { "epoch": 33.08620689655172, "grad_norm": 1.5253247022628784, "learning_rate": 2.8924137931034483e-05, "loss": 0.2044, "step": 9595 }, { "epoch": 33.08965517241379, "grad_norm": 0.5414915084838867, "learning_rate": 2.892367816091954e-05, "loss": 0.2324, "step": 9596 }, { "epoch": 33.09310344827586, "grad_norm": 0.574743390083313, "learning_rate": 2.89232183908046e-05, "loss": 0.2077, "step": 9597 }, { "epoch": 33.09655172413793, "grad_norm": 0.5785346627235413, "learning_rate": 2.8922758620689656e-05, "loss": 0.2517, "step": 9598 }, { "epoch": 33.1, "grad_norm": 0.8365843892097473, "learning_rate": 2.8922298850574714e-05, "loss": 0.1893, "step": 9599 }, { "epoch": 33.10344827586207, "grad_norm": 0.6397738456726074, "learning_rate": 2.892183908045977e-05, "loss": 0.2136, "step": 9600 }, { "epoch": 33.10689655172414, "grad_norm": 1.1830188035964966, "learning_rate": 2.8921379310344828e-05, "loss": 0.2066, "step": 9601 }, { "epoch": 33.110344827586204, "grad_norm": 0.8323506116867065, "learning_rate": 2.8920919540229887e-05, "loss": 0.1908, "step": 9602 }, { "epoch": 33.11379310344827, "grad_norm": 0.6663456559181213, "learning_rate": 2.8920459770114942e-05, "loss": 0.2149, "step": 9603 }, { "epoch": 33.11724137931034, "grad_norm": 0.6532272100448608, "learning_rate": 2.892e-05, "loss": 0.2101, "step": 9604 }, { "epoch": 33.12068965517241, "grad_norm": 0.7396142482757568, "learning_rate": 2.891954022988506e-05, "loss": 0.1888, "step": 9605 }, { "epoch": 33.12413793103448, "grad_norm": 1.1619234085083008, "learning_rate": 2.8919080459770115e-05, "loss": 0.2028, "step": 9606 }, { "epoch": 33.12758620689655, "grad_norm": 1.6061958074569702, "learning_rate": 2.8918620689655174e-05, "loss": 0.1809, "step": 9607 }, { "epoch": 33.13103448275862, "grad_norm": 0.9135673642158508, "learning_rate": 2.891816091954023e-05, "loss": 0.1798, "step": 9608 }, { "epoch": 33.13448275862069, "grad_norm": 1.717264175415039, "learning_rate": 2.891770114942529e-05, "loss": 0.1697, "step": 9609 }, { "epoch": 33.13793103448276, "grad_norm": 3.240902900695801, "learning_rate": 2.8917241379310346e-05, "loss": 0.2043, "step": 9610 }, { "epoch": 33.141379310344824, "grad_norm": 0.7872964143753052, "learning_rate": 2.89167816091954e-05, "loss": 0.1872, "step": 9611 }, { "epoch": 33.144827586206894, "grad_norm": 1.5371822118759155, "learning_rate": 2.891632183908046e-05, "loss": 0.1657, "step": 9612 }, { "epoch": 33.148275862068964, "grad_norm": 0.9391132593154907, "learning_rate": 2.891586206896552e-05, "loss": 0.1929, "step": 9613 }, { "epoch": 33.15172413793103, "grad_norm": 0.7358919978141785, "learning_rate": 2.8915402298850578e-05, "loss": 0.1511, "step": 9614 }, { "epoch": 33.1551724137931, "grad_norm": 0.8662720918655396, "learning_rate": 2.8914942528735633e-05, "loss": 0.1641, "step": 9615 }, { "epoch": 33.15862068965517, "grad_norm": 0.7411629557609558, "learning_rate": 2.8914482758620688e-05, "loss": 0.1865, "step": 9616 }, { "epoch": 33.16206896551724, "grad_norm": 1.365781545639038, "learning_rate": 2.891402298850575e-05, "loss": 0.1929, "step": 9617 }, { "epoch": 33.16551724137931, "grad_norm": 1.3178009986877441, "learning_rate": 2.8913563218390806e-05, "loss": 0.1615, "step": 9618 }, { "epoch": 33.16896551724138, "grad_norm": 1.3100825548171997, "learning_rate": 2.8913103448275864e-05, "loss": 0.1839, "step": 9619 }, { "epoch": 33.172413793103445, "grad_norm": 1.1811543703079224, "learning_rate": 2.891264367816092e-05, "loss": 0.2762, "step": 9620 }, { "epoch": 33.175862068965515, "grad_norm": 0.6361193060874939, "learning_rate": 2.8912183908045978e-05, "loss": 0.2537, "step": 9621 }, { "epoch": 33.179310344827584, "grad_norm": 0.5988972783088684, "learning_rate": 2.8911724137931037e-05, "loss": 0.2605, "step": 9622 }, { "epoch": 33.182758620689654, "grad_norm": 0.8209428191184998, "learning_rate": 2.8911264367816092e-05, "loss": 0.2254, "step": 9623 }, { "epoch": 33.186206896551724, "grad_norm": 0.8152108788490295, "learning_rate": 2.891080459770115e-05, "loss": 0.2066, "step": 9624 }, { "epoch": 33.189655172413794, "grad_norm": 0.5834640860557556, "learning_rate": 2.891034482758621e-05, "loss": 0.1848, "step": 9625 }, { "epoch": 33.19310344827586, "grad_norm": 0.626183032989502, "learning_rate": 2.8909885057471265e-05, "loss": 0.2049, "step": 9626 }, { "epoch": 33.19655172413793, "grad_norm": 1.3690345287322998, "learning_rate": 2.8909425287356324e-05, "loss": 0.2036, "step": 9627 }, { "epoch": 33.2, "grad_norm": 1.286291241645813, "learning_rate": 2.890896551724138e-05, "loss": 0.204, "step": 9628 }, { "epoch": 33.203448275862065, "grad_norm": 1.036307692527771, "learning_rate": 2.8908505747126438e-05, "loss": 0.1712, "step": 9629 }, { "epoch": 33.206896551724135, "grad_norm": 0.6903652548789978, "learning_rate": 2.8908045977011496e-05, "loss": 0.1742, "step": 9630 }, { "epoch": 33.210344827586205, "grad_norm": 1.6285555362701416, "learning_rate": 2.890758620689655e-05, "loss": 0.2002, "step": 9631 }, { "epoch": 33.213793103448275, "grad_norm": 1.9625660181045532, "learning_rate": 2.890712643678161e-05, "loss": 0.1936, "step": 9632 }, { "epoch": 33.217241379310344, "grad_norm": 0.5512539148330688, "learning_rate": 2.890666666666667e-05, "loss": 0.1964, "step": 9633 }, { "epoch": 33.220689655172414, "grad_norm": 1.0025111436843872, "learning_rate": 2.8906206896551724e-05, "loss": 0.1695, "step": 9634 }, { "epoch": 33.224137931034484, "grad_norm": 1.1223630905151367, "learning_rate": 2.8905747126436783e-05, "loss": 0.1869, "step": 9635 }, { "epoch": 33.227586206896554, "grad_norm": 0.9386969804763794, "learning_rate": 2.8905287356321838e-05, "loss": 0.1868, "step": 9636 }, { "epoch": 33.23103448275862, "grad_norm": 1.0796303749084473, "learning_rate": 2.89048275862069e-05, "loss": 0.1909, "step": 9637 }, { "epoch": 33.234482758620686, "grad_norm": 1.797167420387268, "learning_rate": 2.8904367816091955e-05, "loss": 0.1838, "step": 9638 }, { "epoch": 33.237931034482756, "grad_norm": 1.0805320739746094, "learning_rate": 2.890390804597701e-05, "loss": 0.1652, "step": 9639 }, { "epoch": 33.241379310344826, "grad_norm": 0.6900908350944519, "learning_rate": 2.890344827586207e-05, "loss": 0.1635, "step": 9640 }, { "epoch": 33.244827586206895, "grad_norm": 0.9526169300079346, "learning_rate": 2.8902988505747128e-05, "loss": 0.1488, "step": 9641 }, { "epoch": 33.248275862068965, "grad_norm": 1.1175124645233154, "learning_rate": 2.8902528735632187e-05, "loss": 0.1512, "step": 9642 }, { "epoch": 33.251724137931035, "grad_norm": 0.8945664167404175, "learning_rate": 2.8902068965517242e-05, "loss": 0.1639, "step": 9643 }, { "epoch": 33.255172413793105, "grad_norm": 0.9483864307403564, "learning_rate": 2.8901609195402297e-05, "loss": 0.1713, "step": 9644 }, { "epoch": 33.258620689655174, "grad_norm": 1.1074272394180298, "learning_rate": 2.890114942528736e-05, "loss": 0.218, "step": 9645 }, { "epoch": 33.262068965517244, "grad_norm": 0.45640960335731506, "learning_rate": 2.8900689655172415e-05, "loss": 0.2506, "step": 9646 }, { "epoch": 33.265517241379314, "grad_norm": 0.5388578772544861, "learning_rate": 2.8900229885057473e-05, "loss": 0.2254, "step": 9647 }, { "epoch": 33.26896551724138, "grad_norm": 1.276506781578064, "learning_rate": 2.889977011494253e-05, "loss": 0.2227, "step": 9648 }, { "epoch": 33.272413793103446, "grad_norm": 0.5880995988845825, "learning_rate": 2.8899310344827587e-05, "loss": 0.25, "step": 9649 }, { "epoch": 33.275862068965516, "grad_norm": 1.0451574325561523, "learning_rate": 2.8898850574712646e-05, "loss": 0.2055, "step": 9650 }, { "epoch": 33.279310344827586, "grad_norm": 0.694435715675354, "learning_rate": 2.88983908045977e-05, "loss": 0.1973, "step": 9651 }, { "epoch": 33.282758620689656, "grad_norm": 1.57536780834198, "learning_rate": 2.889793103448276e-05, "loss": 0.1894, "step": 9652 }, { "epoch": 33.286206896551725, "grad_norm": 2.81054949760437, "learning_rate": 2.8897471264367815e-05, "loss": 0.2189, "step": 9653 }, { "epoch": 33.289655172413795, "grad_norm": 0.5786515474319458, "learning_rate": 2.8897011494252874e-05, "loss": 0.1708, "step": 9654 }, { "epoch": 33.293103448275865, "grad_norm": 0.877577543258667, "learning_rate": 2.8896551724137933e-05, "loss": 0.1935, "step": 9655 }, { "epoch": 33.296551724137935, "grad_norm": 0.8077136874198914, "learning_rate": 2.8896091954022988e-05, "loss": 0.198, "step": 9656 }, { "epoch": 33.3, "grad_norm": 1.0743334293365479, "learning_rate": 2.8895632183908047e-05, "loss": 0.1791, "step": 9657 }, { "epoch": 33.30344827586207, "grad_norm": 0.7441311478614807, "learning_rate": 2.8895172413793105e-05, "loss": 0.1829, "step": 9658 }, { "epoch": 33.30689655172414, "grad_norm": 0.8675210475921631, "learning_rate": 2.889471264367816e-05, "loss": 0.1788, "step": 9659 }, { "epoch": 33.310344827586206, "grad_norm": 0.5750216841697693, "learning_rate": 2.889425287356322e-05, "loss": 0.1785, "step": 9660 }, { "epoch": 33.313793103448276, "grad_norm": 0.6955519914627075, "learning_rate": 2.8893793103448275e-05, "loss": 0.187, "step": 9661 }, { "epoch": 33.317241379310346, "grad_norm": 0.5901178121566772, "learning_rate": 2.8893333333333333e-05, "loss": 0.159, "step": 9662 }, { "epoch": 33.320689655172416, "grad_norm": 0.5481398105621338, "learning_rate": 2.8892873563218392e-05, "loss": 0.1456, "step": 9663 }, { "epoch": 33.324137931034485, "grad_norm": 0.7284688949584961, "learning_rate": 2.8892413793103447e-05, "loss": 0.1844, "step": 9664 }, { "epoch": 33.327586206896555, "grad_norm": 1.2412265539169312, "learning_rate": 2.8891954022988506e-05, "loss": 0.1978, "step": 9665 }, { "epoch": 33.33103448275862, "grad_norm": 0.8700741529464722, "learning_rate": 2.8891494252873565e-05, "loss": 0.1588, "step": 9666 }, { "epoch": 33.33448275862069, "grad_norm": 1.106033444404602, "learning_rate": 2.889103448275862e-05, "loss": 0.1742, "step": 9667 }, { "epoch": 33.33793103448276, "grad_norm": 0.9638035893440247, "learning_rate": 2.889057471264368e-05, "loss": 0.1393, "step": 9668 }, { "epoch": 33.34137931034483, "grad_norm": 0.8943072557449341, "learning_rate": 2.8890114942528734e-05, "loss": 0.1983, "step": 9669 }, { "epoch": 33.3448275862069, "grad_norm": 1.3463001251220703, "learning_rate": 2.8889655172413796e-05, "loss": 0.2146, "step": 9670 }, { "epoch": 33.34827586206897, "grad_norm": 0.8534112572669983, "learning_rate": 2.888919540229885e-05, "loss": 0.2956, "step": 9671 }, { "epoch": 33.351724137931036, "grad_norm": 0.4232739508152008, "learning_rate": 2.8888735632183907e-05, "loss": 0.2109, "step": 9672 }, { "epoch": 33.355172413793106, "grad_norm": 0.5545104146003723, "learning_rate": 2.8888275862068965e-05, "loss": 0.2083, "step": 9673 }, { "epoch": 33.358620689655176, "grad_norm": 0.8016911745071411, "learning_rate": 2.8887816091954024e-05, "loss": 0.208, "step": 9674 }, { "epoch": 33.36206896551724, "grad_norm": 0.7121720314025879, "learning_rate": 2.8887356321839083e-05, "loss": 0.2096, "step": 9675 }, { "epoch": 33.36551724137931, "grad_norm": 1.000738501548767, "learning_rate": 2.8886896551724138e-05, "loss": 0.21, "step": 9676 }, { "epoch": 33.36896551724138, "grad_norm": 0.6779831647872925, "learning_rate": 2.8886436781609193e-05, "loss": 0.1783, "step": 9677 }, { "epoch": 33.37241379310345, "grad_norm": 0.5643300414085388, "learning_rate": 2.8885977011494255e-05, "loss": 0.185, "step": 9678 }, { "epoch": 33.37586206896552, "grad_norm": 0.8852789402008057, "learning_rate": 2.888551724137931e-05, "loss": 0.194, "step": 9679 }, { "epoch": 33.37931034482759, "grad_norm": 0.7445769309997559, "learning_rate": 2.888505747126437e-05, "loss": 0.1866, "step": 9680 }, { "epoch": 33.38275862068966, "grad_norm": 0.7867287397384644, "learning_rate": 2.8884597701149425e-05, "loss": 0.1906, "step": 9681 }, { "epoch": 33.38620689655173, "grad_norm": 0.7359448671340942, "learning_rate": 2.8884137931034483e-05, "loss": 0.1914, "step": 9682 }, { "epoch": 33.389655172413796, "grad_norm": 1.048401117324829, "learning_rate": 2.8883678160919542e-05, "loss": 0.1524, "step": 9683 }, { "epoch": 33.39310344827586, "grad_norm": 0.867267370223999, "learning_rate": 2.8883218390804597e-05, "loss": 0.1828, "step": 9684 }, { "epoch": 33.39655172413793, "grad_norm": 0.6623489260673523, "learning_rate": 2.8882758620689656e-05, "loss": 0.1753, "step": 9685 }, { "epoch": 33.4, "grad_norm": 0.6774595379829407, "learning_rate": 2.8882298850574715e-05, "loss": 0.1764, "step": 9686 }, { "epoch": 33.40344827586207, "grad_norm": 0.7014899253845215, "learning_rate": 2.888183908045977e-05, "loss": 0.1731, "step": 9687 }, { "epoch": 33.40689655172414, "grad_norm": 0.8435970544815063, "learning_rate": 2.888137931034483e-05, "loss": 0.1849, "step": 9688 }, { "epoch": 33.41034482758621, "grad_norm": 1.967657446861267, "learning_rate": 2.8880919540229884e-05, "loss": 0.1643, "step": 9689 }, { "epoch": 33.41379310344828, "grad_norm": 2.075807571411133, "learning_rate": 2.8880459770114943e-05, "loss": 0.1562, "step": 9690 }, { "epoch": 33.41724137931035, "grad_norm": 0.6808691620826721, "learning_rate": 2.888e-05, "loss": 0.1909, "step": 9691 }, { "epoch": 33.42068965517242, "grad_norm": 1.658255696296692, "learning_rate": 2.8879540229885057e-05, "loss": 0.1589, "step": 9692 }, { "epoch": 33.42413793103448, "grad_norm": 0.7257243394851685, "learning_rate": 2.8879080459770115e-05, "loss": 0.1595, "step": 9693 }, { "epoch": 33.42758620689655, "grad_norm": 1.2612384557724, "learning_rate": 2.8878620689655174e-05, "loss": 0.1673, "step": 9694 }, { "epoch": 33.43103448275862, "grad_norm": 1.0933191776275635, "learning_rate": 2.887816091954023e-05, "loss": 0.267, "step": 9695 }, { "epoch": 33.43448275862069, "grad_norm": 0.7329489588737488, "learning_rate": 2.8877701149425288e-05, "loss": 0.2697, "step": 9696 }, { "epoch": 33.43793103448276, "grad_norm": 0.6127701997756958, "learning_rate": 2.8877241379310343e-05, "loss": 0.2277, "step": 9697 }, { "epoch": 33.44137931034483, "grad_norm": 0.5252819657325745, "learning_rate": 2.8876781609195405e-05, "loss": 0.2258, "step": 9698 }, { "epoch": 33.4448275862069, "grad_norm": 0.6690179109573364, "learning_rate": 2.887632183908046e-05, "loss": 0.2085, "step": 9699 }, { "epoch": 33.44827586206897, "grad_norm": 0.964230477809906, "learning_rate": 2.8875862068965516e-05, "loss": 0.2125, "step": 9700 }, { "epoch": 33.45172413793104, "grad_norm": 0.6131202578544617, "learning_rate": 2.8875402298850575e-05, "loss": 0.1842, "step": 9701 }, { "epoch": 33.4551724137931, "grad_norm": 1.2405463457107544, "learning_rate": 2.8874942528735633e-05, "loss": 0.2028, "step": 9702 }, { "epoch": 33.45862068965517, "grad_norm": 2.746180295944214, "learning_rate": 2.8874482758620692e-05, "loss": 0.197, "step": 9703 }, { "epoch": 33.46206896551724, "grad_norm": 1.0537601709365845, "learning_rate": 2.8874022988505747e-05, "loss": 0.1796, "step": 9704 }, { "epoch": 33.46551724137931, "grad_norm": 0.4812438488006592, "learning_rate": 2.8873563218390803e-05, "loss": 0.1791, "step": 9705 }, { "epoch": 33.46896551724138, "grad_norm": 0.7979946732521057, "learning_rate": 2.8873103448275865e-05, "loss": 0.1787, "step": 9706 }, { "epoch": 33.47241379310345, "grad_norm": 0.5843731164932251, "learning_rate": 2.887264367816092e-05, "loss": 0.1975, "step": 9707 }, { "epoch": 33.47586206896552, "grad_norm": 1.4688050746917725, "learning_rate": 2.887218390804598e-05, "loss": 0.1744, "step": 9708 }, { "epoch": 33.47931034482759, "grad_norm": 2.1113359928131104, "learning_rate": 2.8871724137931034e-05, "loss": 0.1859, "step": 9709 }, { "epoch": 33.48275862068966, "grad_norm": 0.7175576090812683, "learning_rate": 2.8871264367816093e-05, "loss": 0.1802, "step": 9710 }, { "epoch": 33.48620689655172, "grad_norm": 1.0289180278778076, "learning_rate": 2.887080459770115e-05, "loss": 0.1676, "step": 9711 }, { "epoch": 33.48965517241379, "grad_norm": 0.8934184908866882, "learning_rate": 2.8870344827586207e-05, "loss": 0.1768, "step": 9712 }, { "epoch": 33.49310344827586, "grad_norm": 0.7981610894203186, "learning_rate": 2.8869885057471265e-05, "loss": 0.1923, "step": 9713 }, { "epoch": 33.49655172413793, "grad_norm": 0.7665585875511169, "learning_rate": 2.8869425287356324e-05, "loss": 0.1752, "step": 9714 }, { "epoch": 33.5, "grad_norm": 1.0478577613830566, "learning_rate": 2.886896551724138e-05, "loss": 0.1586, "step": 9715 }, { "epoch": 33.50344827586207, "grad_norm": 1.6155949831008911, "learning_rate": 2.8868505747126438e-05, "loss": 0.1591, "step": 9716 }, { "epoch": 33.50689655172414, "grad_norm": 0.824458122253418, "learning_rate": 2.8868045977011493e-05, "loss": 0.1686, "step": 9717 }, { "epoch": 33.51034482758621, "grad_norm": 1.1918895244598389, "learning_rate": 2.8867586206896552e-05, "loss": 0.1957, "step": 9718 }, { "epoch": 33.51379310344828, "grad_norm": 0.8346988558769226, "learning_rate": 2.886712643678161e-05, "loss": 0.166, "step": 9719 }, { "epoch": 33.51724137931034, "grad_norm": 1.394553303718567, "learning_rate": 2.8866666666666666e-05, "loss": 0.1971, "step": 9720 }, { "epoch": 33.52068965517241, "grad_norm": 0.7929716110229492, "learning_rate": 2.8866206896551725e-05, "loss": 0.2626, "step": 9721 }, { "epoch": 33.52413793103448, "grad_norm": 0.7093967199325562, "learning_rate": 2.8865747126436783e-05, "loss": 0.2115, "step": 9722 }, { "epoch": 33.52758620689655, "grad_norm": 0.5676634907722473, "learning_rate": 2.886528735632184e-05, "loss": 0.2163, "step": 9723 }, { "epoch": 33.53103448275862, "grad_norm": 1.3748940229415894, "learning_rate": 2.8864827586206897e-05, "loss": 0.1891, "step": 9724 }, { "epoch": 33.53448275862069, "grad_norm": 1.3149018287658691, "learning_rate": 2.8864367816091952e-05, "loss": 0.2184, "step": 9725 }, { "epoch": 33.53793103448276, "grad_norm": 0.6678007245063782, "learning_rate": 2.8863908045977015e-05, "loss": 0.2006, "step": 9726 }, { "epoch": 33.54137931034483, "grad_norm": 0.5346056222915649, "learning_rate": 2.886344827586207e-05, "loss": 0.1892, "step": 9727 }, { "epoch": 33.5448275862069, "grad_norm": 1.4789273738861084, "learning_rate": 2.8862988505747125e-05, "loss": 0.2035, "step": 9728 }, { "epoch": 33.54827586206896, "grad_norm": 0.8238654136657715, "learning_rate": 2.8862528735632184e-05, "loss": 0.1974, "step": 9729 }, { "epoch": 33.55172413793103, "grad_norm": 0.5472570657730103, "learning_rate": 2.8862068965517243e-05, "loss": 0.1881, "step": 9730 }, { "epoch": 33.5551724137931, "grad_norm": 0.9702746272087097, "learning_rate": 2.88616091954023e-05, "loss": 0.1868, "step": 9731 }, { "epoch": 33.55862068965517, "grad_norm": 2.01997971534729, "learning_rate": 2.8861149425287356e-05, "loss": 0.1902, "step": 9732 }, { "epoch": 33.56206896551724, "grad_norm": 1.0810606479644775, "learning_rate": 2.8860689655172412e-05, "loss": 0.1774, "step": 9733 }, { "epoch": 33.56551724137931, "grad_norm": 1.0965451002120972, "learning_rate": 2.8860229885057474e-05, "loss": 0.1709, "step": 9734 }, { "epoch": 33.56896551724138, "grad_norm": 0.7668156623840332, "learning_rate": 2.885977011494253e-05, "loss": 0.2265, "step": 9735 }, { "epoch": 33.57241379310345, "grad_norm": 0.6775228977203369, "learning_rate": 2.8859310344827588e-05, "loss": 0.1826, "step": 9736 }, { "epoch": 33.57586206896552, "grad_norm": 0.9053326845169067, "learning_rate": 2.8858850574712643e-05, "loss": 0.1749, "step": 9737 }, { "epoch": 33.57931034482758, "grad_norm": 0.8131545186042786, "learning_rate": 2.8858390804597702e-05, "loss": 0.1514, "step": 9738 }, { "epoch": 33.58275862068965, "grad_norm": 0.6908425092697144, "learning_rate": 2.885793103448276e-05, "loss": 0.2198, "step": 9739 }, { "epoch": 33.58620689655172, "grad_norm": 1.1857118606567383, "learning_rate": 2.8857471264367816e-05, "loss": 0.2003, "step": 9740 }, { "epoch": 33.58965517241379, "grad_norm": 1.2528432607650757, "learning_rate": 2.8857011494252874e-05, "loss": 0.1872, "step": 9741 }, { "epoch": 33.59310344827586, "grad_norm": 1.7893900871276855, "learning_rate": 2.8856551724137933e-05, "loss": 0.1662, "step": 9742 }, { "epoch": 33.59655172413793, "grad_norm": 0.9026615619659424, "learning_rate": 2.885609195402299e-05, "loss": 0.1587, "step": 9743 }, { "epoch": 33.6, "grad_norm": 0.8778151273727417, "learning_rate": 2.8855632183908047e-05, "loss": 0.1845, "step": 9744 }, { "epoch": 33.60344827586207, "grad_norm": 1.1299479007720947, "learning_rate": 2.8855172413793102e-05, "loss": 0.226, "step": 9745 }, { "epoch": 33.60689655172414, "grad_norm": 0.7029897570610046, "learning_rate": 2.885471264367816e-05, "loss": 0.2578, "step": 9746 }, { "epoch": 33.610344827586204, "grad_norm": 0.7513349652290344, "learning_rate": 2.885425287356322e-05, "loss": 0.2215, "step": 9747 }, { "epoch": 33.61379310344827, "grad_norm": 0.6532371640205383, "learning_rate": 2.8853793103448275e-05, "loss": 0.2456, "step": 9748 }, { "epoch": 33.61724137931034, "grad_norm": 0.7099756002426147, "learning_rate": 2.8853333333333334e-05, "loss": 0.2186, "step": 9749 }, { "epoch": 33.62068965517241, "grad_norm": 1.0474014282226562, "learning_rate": 2.8852873563218392e-05, "loss": 0.23, "step": 9750 }, { "epoch": 33.62413793103448, "grad_norm": 1.6557633876800537, "learning_rate": 2.8852413793103448e-05, "loss": 0.2035, "step": 9751 }, { "epoch": 33.62758620689655, "grad_norm": 0.7065911293029785, "learning_rate": 2.8851954022988506e-05, "loss": 0.2092, "step": 9752 }, { "epoch": 33.63103448275862, "grad_norm": 0.6159411072731018, "learning_rate": 2.8851494252873562e-05, "loss": 0.2146, "step": 9753 }, { "epoch": 33.63448275862069, "grad_norm": 1.476457953453064, "learning_rate": 2.8851034482758624e-05, "loss": 0.2057, "step": 9754 }, { "epoch": 33.63793103448276, "grad_norm": 0.888414204120636, "learning_rate": 2.885057471264368e-05, "loss": 0.1975, "step": 9755 }, { "epoch": 33.641379310344824, "grad_norm": 2.556244373321533, "learning_rate": 2.8850114942528734e-05, "loss": 0.1788, "step": 9756 }, { "epoch": 33.644827586206894, "grad_norm": 2.4135537147521973, "learning_rate": 2.8849655172413793e-05, "loss": 0.186, "step": 9757 }, { "epoch": 33.648275862068964, "grad_norm": 0.8117613196372986, "learning_rate": 2.8849195402298852e-05, "loss": 0.2128, "step": 9758 }, { "epoch": 33.65172413793103, "grad_norm": 0.7868035435676575, "learning_rate": 2.884873563218391e-05, "loss": 0.1634, "step": 9759 }, { "epoch": 33.6551724137931, "grad_norm": 0.7478260397911072, "learning_rate": 2.8848275862068966e-05, "loss": 0.1796, "step": 9760 }, { "epoch": 33.65862068965517, "grad_norm": 0.8077462315559387, "learning_rate": 2.884781609195402e-05, "loss": 0.1748, "step": 9761 }, { "epoch": 33.66206896551724, "grad_norm": 0.6758078932762146, "learning_rate": 2.8847356321839083e-05, "loss": 0.1726, "step": 9762 }, { "epoch": 33.66551724137931, "grad_norm": 0.8122615218162537, "learning_rate": 2.884689655172414e-05, "loss": 0.1698, "step": 9763 }, { "epoch": 33.66896551724138, "grad_norm": 1.1930707693099976, "learning_rate": 2.8846436781609197e-05, "loss": 0.1649, "step": 9764 }, { "epoch": 33.672413793103445, "grad_norm": 0.7543516159057617, "learning_rate": 2.8845977011494252e-05, "loss": 0.1676, "step": 9765 }, { "epoch": 33.675862068965515, "grad_norm": 0.7367312908172607, "learning_rate": 2.884551724137931e-05, "loss": 0.1959, "step": 9766 }, { "epoch": 33.679310344827584, "grad_norm": 2.0106327533721924, "learning_rate": 2.884505747126437e-05, "loss": 0.1607, "step": 9767 }, { "epoch": 33.682758620689654, "grad_norm": 0.6910700798034668, "learning_rate": 2.8844597701149425e-05, "loss": 0.1677, "step": 9768 }, { "epoch": 33.686206896551724, "grad_norm": 0.8207547664642334, "learning_rate": 2.8844137931034484e-05, "loss": 0.1899, "step": 9769 }, { "epoch": 33.689655172413794, "grad_norm": 2.6931309700012207, "learning_rate": 2.8843678160919542e-05, "loss": 0.2373, "step": 9770 }, { "epoch": 33.69310344827586, "grad_norm": 0.8819420337677002, "learning_rate": 2.8843218390804598e-05, "loss": 0.292, "step": 9771 }, { "epoch": 33.69655172413793, "grad_norm": 0.7613042593002319, "learning_rate": 2.8842758620689656e-05, "loss": 0.2234, "step": 9772 }, { "epoch": 33.7, "grad_norm": 0.8386480808258057, "learning_rate": 2.884229885057471e-05, "loss": 0.2295, "step": 9773 }, { "epoch": 33.703448275862065, "grad_norm": 1.0546468496322632, "learning_rate": 2.884183908045977e-05, "loss": 0.2262, "step": 9774 }, { "epoch": 33.706896551724135, "grad_norm": 1.0345025062561035, "learning_rate": 2.884137931034483e-05, "loss": 0.1897, "step": 9775 }, { "epoch": 33.710344827586205, "grad_norm": 0.7419296503067017, "learning_rate": 2.8840919540229884e-05, "loss": 0.2075, "step": 9776 }, { "epoch": 33.713793103448275, "grad_norm": 1.0288656949996948, "learning_rate": 2.8840459770114943e-05, "loss": 0.2427, "step": 9777 }, { "epoch": 33.717241379310344, "grad_norm": 1.4768476486206055, "learning_rate": 2.8840000000000002e-05, "loss": 0.2034, "step": 9778 }, { "epoch": 33.720689655172414, "grad_norm": 0.8775933980941772, "learning_rate": 2.8839540229885057e-05, "loss": 0.219, "step": 9779 }, { "epoch": 33.724137931034484, "grad_norm": 2.8938143253326416, "learning_rate": 2.8839080459770116e-05, "loss": 0.1964, "step": 9780 }, { "epoch": 33.727586206896554, "grad_norm": 2.0656898021698, "learning_rate": 2.883862068965517e-05, "loss": 0.1852, "step": 9781 }, { "epoch": 33.73103448275862, "grad_norm": 0.863023042678833, "learning_rate": 2.8838160919540233e-05, "loss": 0.2285, "step": 9782 }, { "epoch": 33.734482758620686, "grad_norm": 0.8957240581512451, "learning_rate": 2.883770114942529e-05, "loss": 0.1798, "step": 9783 }, { "epoch": 33.737931034482756, "grad_norm": 0.5429847836494446, "learning_rate": 2.8837241379310344e-05, "loss": 0.1747, "step": 9784 }, { "epoch": 33.741379310344826, "grad_norm": 0.8221002817153931, "learning_rate": 2.8836781609195402e-05, "loss": 0.201, "step": 9785 }, { "epoch": 33.744827586206895, "grad_norm": 1.714901089668274, "learning_rate": 2.883632183908046e-05, "loss": 0.1858, "step": 9786 }, { "epoch": 33.748275862068965, "grad_norm": 1.5465576648712158, "learning_rate": 2.883586206896552e-05, "loss": 0.1857, "step": 9787 }, { "epoch": 33.751724137931035, "grad_norm": 1.4065132141113281, "learning_rate": 2.8835402298850575e-05, "loss": 0.1764, "step": 9788 }, { "epoch": 33.755172413793105, "grad_norm": 0.8510115146636963, "learning_rate": 2.883494252873563e-05, "loss": 0.1808, "step": 9789 }, { "epoch": 33.758620689655174, "grad_norm": 1.2260890007019043, "learning_rate": 2.8834482758620692e-05, "loss": 0.1841, "step": 9790 }, { "epoch": 33.762068965517244, "grad_norm": 0.6745616793632507, "learning_rate": 2.8834022988505748e-05, "loss": 0.1735, "step": 9791 }, { "epoch": 33.765517241379314, "grad_norm": 1.1868795156478882, "learning_rate": 2.8833563218390806e-05, "loss": 0.175, "step": 9792 }, { "epoch": 33.76896551724138, "grad_norm": 1.1329526901245117, "learning_rate": 2.883310344827586e-05, "loss": 0.1843, "step": 9793 }, { "epoch": 33.772413793103446, "grad_norm": 1.2059067487716675, "learning_rate": 2.883264367816092e-05, "loss": 0.1779, "step": 9794 }, { "epoch": 33.775862068965516, "grad_norm": 1.5864415168762207, "learning_rate": 2.883218390804598e-05, "loss": 0.242, "step": 9795 }, { "epoch": 33.779310344827586, "grad_norm": 1.120413899421692, "learning_rate": 2.8831724137931034e-05, "loss": 0.2732, "step": 9796 }, { "epoch": 33.782758620689656, "grad_norm": 0.884088397026062, "learning_rate": 2.8831264367816093e-05, "loss": 0.2259, "step": 9797 }, { "epoch": 33.786206896551725, "grad_norm": 2.567138910293579, "learning_rate": 2.883080459770115e-05, "loss": 0.2096, "step": 9798 }, { "epoch": 33.789655172413795, "grad_norm": 0.7919902205467224, "learning_rate": 2.8830344827586207e-05, "loss": 0.1878, "step": 9799 }, { "epoch": 33.793103448275865, "grad_norm": 0.7200024724006653, "learning_rate": 2.8829885057471266e-05, "loss": 0.2178, "step": 9800 }, { "epoch": 33.796551724137935, "grad_norm": 1.925534725189209, "learning_rate": 2.882942528735632e-05, "loss": 0.215, "step": 9801 }, { "epoch": 33.8, "grad_norm": 0.9071018695831299, "learning_rate": 2.8828965517241383e-05, "loss": 0.2241, "step": 9802 }, { "epoch": 33.80344827586207, "grad_norm": 0.5845120549201965, "learning_rate": 2.8828505747126438e-05, "loss": 0.2034, "step": 9803 }, { "epoch": 33.80689655172414, "grad_norm": 0.5664554834365845, "learning_rate": 2.8828045977011494e-05, "loss": 0.1814, "step": 9804 }, { "epoch": 33.810344827586206, "grad_norm": 0.597687840461731, "learning_rate": 2.8827586206896552e-05, "loss": 0.1973, "step": 9805 }, { "epoch": 33.813793103448276, "grad_norm": 1.3111110925674438, "learning_rate": 2.882712643678161e-05, "loss": 0.1957, "step": 9806 }, { "epoch": 33.817241379310346, "grad_norm": 0.6985254883766174, "learning_rate": 2.8826666666666666e-05, "loss": 0.1718, "step": 9807 }, { "epoch": 33.820689655172416, "grad_norm": 2.1649670600891113, "learning_rate": 2.8826206896551725e-05, "loss": 0.1968, "step": 9808 }, { "epoch": 33.824137931034485, "grad_norm": 0.9668829441070557, "learning_rate": 2.882574712643678e-05, "loss": 0.1997, "step": 9809 }, { "epoch": 33.827586206896555, "grad_norm": 0.6731072068214417, "learning_rate": 2.8825287356321842e-05, "loss": 0.1863, "step": 9810 }, { "epoch": 33.83103448275862, "grad_norm": 0.9784933924674988, "learning_rate": 2.8824827586206898e-05, "loss": 0.1727, "step": 9811 }, { "epoch": 33.83448275862069, "grad_norm": 0.6944981217384338, "learning_rate": 2.8824367816091953e-05, "loss": 0.1714, "step": 9812 }, { "epoch": 33.83793103448276, "grad_norm": 1.2755732536315918, "learning_rate": 2.882390804597701e-05, "loss": 0.1914, "step": 9813 }, { "epoch": 33.84137931034483, "grad_norm": 1.158442497253418, "learning_rate": 2.882344827586207e-05, "loss": 0.1773, "step": 9814 }, { "epoch": 33.8448275862069, "grad_norm": 0.689573347568512, "learning_rate": 2.882298850574713e-05, "loss": 0.1792, "step": 9815 }, { "epoch": 33.84827586206897, "grad_norm": 0.784138023853302, "learning_rate": 2.8822528735632184e-05, "loss": 0.1717, "step": 9816 }, { "epoch": 33.851724137931036, "grad_norm": 1.3392319679260254, "learning_rate": 2.882206896551724e-05, "loss": 0.147, "step": 9817 }, { "epoch": 33.855172413793106, "grad_norm": 0.6486470699310303, "learning_rate": 2.88216091954023e-05, "loss": 0.1954, "step": 9818 }, { "epoch": 33.858620689655176, "grad_norm": 0.9220088720321655, "learning_rate": 2.8821149425287357e-05, "loss": 0.209, "step": 9819 }, { "epoch": 33.86206896551724, "grad_norm": 0.7960363030433655, "learning_rate": 2.8820689655172416e-05, "loss": 0.2245, "step": 9820 }, { "epoch": 33.86551724137931, "grad_norm": 0.6879355907440186, "learning_rate": 2.882022988505747e-05, "loss": 0.2204, "step": 9821 }, { "epoch": 33.86896551724138, "grad_norm": 0.5551002025604248, "learning_rate": 2.881977011494253e-05, "loss": 0.2111, "step": 9822 }, { "epoch": 33.87241379310345, "grad_norm": 0.7639089822769165, "learning_rate": 2.8819310344827588e-05, "loss": 0.2026, "step": 9823 }, { "epoch": 33.87586206896552, "grad_norm": 1.0064314603805542, "learning_rate": 2.8818850574712644e-05, "loss": 0.2119, "step": 9824 }, { "epoch": 33.87931034482759, "grad_norm": 1.553612232208252, "learning_rate": 2.8818390804597702e-05, "loss": 0.2074, "step": 9825 }, { "epoch": 33.88275862068966, "grad_norm": 1.9060453176498413, "learning_rate": 2.881793103448276e-05, "loss": 0.2086, "step": 9826 }, { "epoch": 33.88620689655173, "grad_norm": 1.201958417892456, "learning_rate": 2.8817471264367816e-05, "loss": 0.1934, "step": 9827 }, { "epoch": 33.889655172413796, "grad_norm": 0.6416969895362854, "learning_rate": 2.8817011494252875e-05, "loss": 0.2047, "step": 9828 }, { "epoch": 33.89310344827586, "grad_norm": 0.7920922636985779, "learning_rate": 2.881655172413793e-05, "loss": 0.2144, "step": 9829 }, { "epoch": 33.89655172413793, "grad_norm": 0.6220453977584839, "learning_rate": 2.8816091954022992e-05, "loss": 0.1952, "step": 9830 }, { "epoch": 33.9, "grad_norm": 0.8324376344680786, "learning_rate": 2.8815632183908048e-05, "loss": 0.181, "step": 9831 }, { "epoch": 33.90344827586207, "grad_norm": 1.0157201290130615, "learning_rate": 2.8815172413793103e-05, "loss": 0.1902, "step": 9832 }, { "epoch": 33.90689655172414, "grad_norm": 1.2109543085098267, "learning_rate": 2.881471264367816e-05, "loss": 0.1849, "step": 9833 }, { "epoch": 33.91034482758621, "grad_norm": 0.8903905153274536, "learning_rate": 2.881425287356322e-05, "loss": 0.2055, "step": 9834 }, { "epoch": 33.91379310344828, "grad_norm": 0.7983879446983337, "learning_rate": 2.8813793103448275e-05, "loss": 0.213, "step": 9835 }, { "epoch": 33.91724137931035, "grad_norm": 0.8037700653076172, "learning_rate": 2.8813333333333334e-05, "loss": 0.1763, "step": 9836 }, { "epoch": 33.92068965517242, "grad_norm": 0.592495858669281, "learning_rate": 2.881287356321839e-05, "loss": 0.1791, "step": 9837 }, { "epoch": 33.92413793103448, "grad_norm": 0.7488123178482056, "learning_rate": 2.881241379310345e-05, "loss": 0.1703, "step": 9838 }, { "epoch": 33.92758620689655, "grad_norm": 2.9999544620513916, "learning_rate": 2.8811954022988507e-05, "loss": 0.1664, "step": 9839 }, { "epoch": 33.93103448275862, "grad_norm": 1.0104271173477173, "learning_rate": 2.8811494252873562e-05, "loss": 0.1816, "step": 9840 }, { "epoch": 33.93448275862069, "grad_norm": 1.462147831916809, "learning_rate": 2.881103448275862e-05, "loss": 0.1758, "step": 9841 }, { "epoch": 33.93793103448276, "grad_norm": 0.8689472675323486, "learning_rate": 2.881057471264368e-05, "loss": 0.1777, "step": 9842 }, { "epoch": 33.94137931034483, "grad_norm": 1.1810986995697021, "learning_rate": 2.8810114942528738e-05, "loss": 0.1809, "step": 9843 }, { "epoch": 33.9448275862069, "grad_norm": 1.2584855556488037, "learning_rate": 2.8809655172413793e-05, "loss": 0.182, "step": 9844 }, { "epoch": 33.94827586206897, "grad_norm": 1.3383804559707642, "learning_rate": 2.880919540229885e-05, "loss": 0.2113, "step": 9845 }, { "epoch": 33.95172413793104, "grad_norm": 0.9847230315208435, "learning_rate": 2.880873563218391e-05, "loss": 0.2735, "step": 9846 }, { "epoch": 33.9551724137931, "grad_norm": 0.8118479251861572, "learning_rate": 2.8808275862068966e-05, "loss": 0.2163, "step": 9847 }, { "epoch": 33.95862068965517, "grad_norm": 0.6428956389427185, "learning_rate": 2.8807816091954025e-05, "loss": 0.22, "step": 9848 }, { "epoch": 33.96206896551724, "grad_norm": 0.8274654150009155, "learning_rate": 2.880735632183908e-05, "loss": 0.2165, "step": 9849 }, { "epoch": 33.96551724137931, "grad_norm": 1.3425673246383667, "learning_rate": 2.880689655172414e-05, "loss": 0.2091, "step": 9850 }, { "epoch": 33.96896551724138, "grad_norm": 0.8572213053703308, "learning_rate": 2.8806436781609197e-05, "loss": 0.1837, "step": 9851 }, { "epoch": 33.97241379310345, "grad_norm": 0.8811239004135132, "learning_rate": 2.8805977011494253e-05, "loss": 0.2104, "step": 9852 }, { "epoch": 33.97586206896552, "grad_norm": 0.7896023392677307, "learning_rate": 2.880551724137931e-05, "loss": 0.1962, "step": 9853 }, { "epoch": 33.97931034482759, "grad_norm": 1.013768196105957, "learning_rate": 2.880505747126437e-05, "loss": 0.2005, "step": 9854 }, { "epoch": 33.98275862068966, "grad_norm": 1.1056063175201416, "learning_rate": 2.8804597701149425e-05, "loss": 0.1821, "step": 9855 }, { "epoch": 33.98620689655172, "grad_norm": 1.0127379894256592, "learning_rate": 2.8804137931034484e-05, "loss": 0.1849, "step": 9856 }, { "epoch": 33.98965517241379, "grad_norm": 1.019081473350525, "learning_rate": 2.880367816091954e-05, "loss": 0.1752, "step": 9857 }, { "epoch": 33.99310344827586, "grad_norm": 0.7456434369087219, "learning_rate": 2.88032183908046e-05, "loss": 0.1813, "step": 9858 }, { "epoch": 33.99655172413793, "grad_norm": 1.2014214992523193, "learning_rate": 2.8802758620689657e-05, "loss": 0.2118, "step": 9859 }, { "epoch": 34.0, "grad_norm": 1.1327027082443237, "learning_rate": 2.8802298850574712e-05, "loss": 0.2685, "step": 9860 }, { "epoch": 34.00344827586207, "grad_norm": 0.48637744784355164, "learning_rate": 2.880183908045977e-05, "loss": 0.2681, "step": 9861 }, { "epoch": 34.00689655172414, "grad_norm": 0.6307554841041565, "learning_rate": 2.880137931034483e-05, "loss": 0.2334, "step": 9862 }, { "epoch": 34.01034482758621, "grad_norm": 0.5292021632194519, "learning_rate": 2.8800919540229885e-05, "loss": 0.2223, "step": 9863 }, { "epoch": 34.01379310344828, "grad_norm": 0.8379344344139099, "learning_rate": 2.8800459770114943e-05, "loss": 0.2035, "step": 9864 }, { "epoch": 34.01724137931034, "grad_norm": 1.3977580070495605, "learning_rate": 2.88e-05, "loss": 0.2132, "step": 9865 }, { "epoch": 34.02068965517241, "grad_norm": 0.6072829961776733, "learning_rate": 2.879954022988506e-05, "loss": 0.191, "step": 9866 }, { "epoch": 34.02413793103448, "grad_norm": 2.4567954540252686, "learning_rate": 2.8799080459770116e-05, "loss": 0.2323, "step": 9867 }, { "epoch": 34.02758620689655, "grad_norm": 0.7026530504226685, "learning_rate": 2.879862068965517e-05, "loss": 0.2133, "step": 9868 }, { "epoch": 34.03103448275862, "grad_norm": 0.60567307472229, "learning_rate": 2.879816091954023e-05, "loss": 0.1977, "step": 9869 }, { "epoch": 34.03448275862069, "grad_norm": 2.12383770942688, "learning_rate": 2.879770114942529e-05, "loss": 0.2021, "step": 9870 }, { "epoch": 34.03793103448276, "grad_norm": 2.6827261447906494, "learning_rate": 2.8797241379310347e-05, "loss": 0.2036, "step": 9871 }, { "epoch": 34.04137931034483, "grad_norm": 1.9849978685379028, "learning_rate": 2.8796781609195403e-05, "loss": 0.19, "step": 9872 }, { "epoch": 34.0448275862069, "grad_norm": 1.2461453676223755, "learning_rate": 2.8796321839080458e-05, "loss": 0.1943, "step": 9873 }, { "epoch": 34.04827586206896, "grad_norm": 0.9795314073562622, "learning_rate": 2.879586206896552e-05, "loss": 0.1947, "step": 9874 }, { "epoch": 34.05172413793103, "grad_norm": 2.008769989013672, "learning_rate": 2.8795402298850575e-05, "loss": 0.21, "step": 9875 }, { "epoch": 34.0551724137931, "grad_norm": 0.921816349029541, "learning_rate": 2.8794942528735634e-05, "loss": 0.178, "step": 9876 }, { "epoch": 34.05862068965517, "grad_norm": 2.318197011947632, "learning_rate": 2.879448275862069e-05, "loss": 0.1838, "step": 9877 }, { "epoch": 34.06206896551724, "grad_norm": 1.3046164512634277, "learning_rate": 2.8794022988505748e-05, "loss": 0.2056, "step": 9878 }, { "epoch": 34.06551724137931, "grad_norm": 1.2755804061889648, "learning_rate": 2.8793563218390807e-05, "loss": 0.1808, "step": 9879 }, { "epoch": 34.06896551724138, "grad_norm": 0.952276349067688, "learning_rate": 2.8793103448275862e-05, "loss": 0.1926, "step": 9880 }, { "epoch": 34.07241379310345, "grad_norm": 2.2954304218292236, "learning_rate": 2.879264367816092e-05, "loss": 0.2118, "step": 9881 }, { "epoch": 34.07586206896552, "grad_norm": 0.9489737749099731, "learning_rate": 2.879218390804598e-05, "loss": 0.1625, "step": 9882 }, { "epoch": 34.07931034482758, "grad_norm": 1.4279160499572754, "learning_rate": 2.8791724137931035e-05, "loss": 0.1775, "step": 9883 }, { "epoch": 34.08275862068965, "grad_norm": 1.4169663190841675, "learning_rate": 2.8791264367816093e-05, "loss": 0.1696, "step": 9884 }, { "epoch": 34.08620689655172, "grad_norm": 1.6317484378814697, "learning_rate": 2.879080459770115e-05, "loss": 0.2181, "step": 9885 }, { "epoch": 34.08965517241379, "grad_norm": 1.1207246780395508, "learning_rate": 2.879034482758621e-05, "loss": 0.2464, "step": 9886 }, { "epoch": 34.09310344827586, "grad_norm": 0.6526554822921753, "learning_rate": 2.8789885057471266e-05, "loss": 0.2124, "step": 9887 }, { "epoch": 34.09655172413793, "grad_norm": 1.0911976099014282, "learning_rate": 2.878942528735632e-05, "loss": 0.2145, "step": 9888 }, { "epoch": 34.1, "grad_norm": 1.2130929231643677, "learning_rate": 2.878896551724138e-05, "loss": 0.1994, "step": 9889 }, { "epoch": 34.10344827586207, "grad_norm": 0.9097170233726501, "learning_rate": 2.878850574712644e-05, "loss": 0.2338, "step": 9890 }, { "epoch": 34.10689655172414, "grad_norm": 0.955073893070221, "learning_rate": 2.8788045977011497e-05, "loss": 0.208, "step": 9891 }, { "epoch": 34.110344827586204, "grad_norm": 0.8229780793190002, "learning_rate": 2.8787586206896553e-05, "loss": 0.1995, "step": 9892 }, { "epoch": 34.11379310344827, "grad_norm": 0.7837929129600525, "learning_rate": 2.8787126436781608e-05, "loss": 0.2162, "step": 9893 }, { "epoch": 34.11724137931034, "grad_norm": 1.171244740486145, "learning_rate": 2.878666666666667e-05, "loss": 0.2071, "step": 9894 }, { "epoch": 34.12068965517241, "grad_norm": 1.3480098247528076, "learning_rate": 2.8786206896551725e-05, "loss": 0.2162, "step": 9895 }, { "epoch": 34.12413793103448, "grad_norm": 0.6665021181106567, "learning_rate": 2.878574712643678e-05, "loss": 0.2002, "step": 9896 }, { "epoch": 34.12758620689655, "grad_norm": 1.0096769332885742, "learning_rate": 2.878528735632184e-05, "loss": 0.2012, "step": 9897 }, { "epoch": 34.13103448275862, "grad_norm": 0.7041345834732056, "learning_rate": 2.8784827586206898e-05, "loss": 0.1996, "step": 9898 }, { "epoch": 34.13448275862069, "grad_norm": 0.6815746426582336, "learning_rate": 2.8784367816091957e-05, "loss": 0.1773, "step": 9899 }, { "epoch": 34.13793103448276, "grad_norm": 2.300588369369507, "learning_rate": 2.8783908045977012e-05, "loss": 0.2218, "step": 9900 }, { "epoch": 34.141379310344824, "grad_norm": 0.8636860251426697, "learning_rate": 2.8783448275862067e-05, "loss": 0.1758, "step": 9901 }, { "epoch": 34.144827586206894, "grad_norm": 0.9691575169563293, "learning_rate": 2.878298850574713e-05, "loss": 0.1617, "step": 9902 }, { "epoch": 34.148275862068964, "grad_norm": 1.7071503400802612, "learning_rate": 2.8782528735632185e-05, "loss": 0.1878, "step": 9903 }, { "epoch": 34.15172413793103, "grad_norm": 1.2981455326080322, "learning_rate": 2.8782068965517243e-05, "loss": 0.1813, "step": 9904 }, { "epoch": 34.1551724137931, "grad_norm": 1.0507404804229736, "learning_rate": 2.87816091954023e-05, "loss": 0.1689, "step": 9905 }, { "epoch": 34.15862068965517, "grad_norm": 0.8420398235321045, "learning_rate": 2.8781149425287357e-05, "loss": 0.169, "step": 9906 }, { "epoch": 34.16206896551724, "grad_norm": 0.6602725982666016, "learning_rate": 2.8780689655172416e-05, "loss": 0.1546, "step": 9907 }, { "epoch": 34.16551724137931, "grad_norm": 2.536552906036377, "learning_rate": 2.878022988505747e-05, "loss": 0.1574, "step": 9908 }, { "epoch": 34.16896551724138, "grad_norm": 1.4348573684692383, "learning_rate": 2.877977011494253e-05, "loss": 0.2112, "step": 9909 }, { "epoch": 34.172413793103445, "grad_norm": 2.130295515060425, "learning_rate": 2.877931034482759e-05, "loss": 0.2449, "step": 9910 }, { "epoch": 34.175862068965515, "grad_norm": 0.8195633292198181, "learning_rate": 2.8778850574712644e-05, "loss": 0.2588, "step": 9911 }, { "epoch": 34.179310344827584, "grad_norm": 0.5397291779518127, "learning_rate": 2.8778390804597703e-05, "loss": 0.2365, "step": 9912 }, { "epoch": 34.182758620689654, "grad_norm": 2.5220577716827393, "learning_rate": 2.8777931034482758e-05, "loss": 0.2391, "step": 9913 }, { "epoch": 34.186206896551724, "grad_norm": 0.6261705756187439, "learning_rate": 2.877747126436782e-05, "loss": 0.2314, "step": 9914 }, { "epoch": 34.189655172413794, "grad_norm": 0.843756377696991, "learning_rate": 2.8777011494252875e-05, "loss": 0.189, "step": 9915 }, { "epoch": 34.19310344827586, "grad_norm": 0.8289269804954529, "learning_rate": 2.877655172413793e-05, "loss": 0.2114, "step": 9916 }, { "epoch": 34.19655172413793, "grad_norm": 1.8139618635177612, "learning_rate": 2.877609195402299e-05, "loss": 0.1977, "step": 9917 }, { "epoch": 34.2, "grad_norm": 1.3798147439956665, "learning_rate": 2.8775632183908048e-05, "loss": 0.1636, "step": 9918 }, { "epoch": 34.203448275862065, "grad_norm": 0.8564201593399048, "learning_rate": 2.8775172413793107e-05, "loss": 0.2249, "step": 9919 }, { "epoch": 34.206896551724135, "grad_norm": 1.645369291305542, "learning_rate": 2.8774712643678162e-05, "loss": 0.1761, "step": 9920 }, { "epoch": 34.210344827586205, "grad_norm": 0.7405110001564026, "learning_rate": 2.8774252873563217e-05, "loss": 0.1677, "step": 9921 }, { "epoch": 34.213793103448275, "grad_norm": 1.4860831499099731, "learning_rate": 2.877379310344828e-05, "loss": 0.1877, "step": 9922 }, { "epoch": 34.217241379310344, "grad_norm": 0.5500933527946472, "learning_rate": 2.8773333333333335e-05, "loss": 0.1704, "step": 9923 }, { "epoch": 34.220689655172414, "grad_norm": 1.4756823778152466, "learning_rate": 2.877287356321839e-05, "loss": 0.1801, "step": 9924 }, { "epoch": 34.224137931034484, "grad_norm": 1.0833911895751953, "learning_rate": 2.877241379310345e-05, "loss": 0.1878, "step": 9925 }, { "epoch": 34.227586206896554, "grad_norm": 0.8441585898399353, "learning_rate": 2.8771954022988507e-05, "loss": 0.1978, "step": 9926 }, { "epoch": 34.23103448275862, "grad_norm": 1.1542601585388184, "learning_rate": 2.8771494252873566e-05, "loss": 0.1752, "step": 9927 }, { "epoch": 34.234482758620686, "grad_norm": 1.0355768203735352, "learning_rate": 2.877103448275862e-05, "loss": 0.1668, "step": 9928 }, { "epoch": 34.237931034482756, "grad_norm": 0.8722745776176453, "learning_rate": 2.8770574712643676e-05, "loss": 0.1756, "step": 9929 }, { "epoch": 34.241379310344826, "grad_norm": 0.6023699045181274, "learning_rate": 2.877011494252874e-05, "loss": 0.1646, "step": 9930 }, { "epoch": 34.244827586206895, "grad_norm": 1.3650999069213867, "learning_rate": 2.8769655172413794e-05, "loss": 0.1921, "step": 9931 }, { "epoch": 34.248275862068965, "grad_norm": 1.115057349205017, "learning_rate": 2.8769195402298853e-05, "loss": 0.1659, "step": 9932 }, { "epoch": 34.251724137931035, "grad_norm": 1.8151479959487915, "learning_rate": 2.8768735632183908e-05, "loss": 0.1599, "step": 9933 }, { "epoch": 34.255172413793105, "grad_norm": 1.09548020362854, "learning_rate": 2.8768275862068967e-05, "loss": 0.1537, "step": 9934 }, { "epoch": 34.258620689655174, "grad_norm": 1.2160685062408447, "learning_rate": 2.8767816091954025e-05, "loss": 0.2305, "step": 9935 }, { "epoch": 34.262068965517244, "grad_norm": 0.9199950098991394, "learning_rate": 2.876735632183908e-05, "loss": 0.2667, "step": 9936 }, { "epoch": 34.265517241379314, "grad_norm": 0.7703959345817566, "learning_rate": 2.876689655172414e-05, "loss": 0.2203, "step": 9937 }, { "epoch": 34.26896551724138, "grad_norm": 2.092341423034668, "learning_rate": 2.8766436781609198e-05, "loss": 0.2079, "step": 9938 }, { "epoch": 34.272413793103446, "grad_norm": 1.5905927419662476, "learning_rate": 2.8765977011494253e-05, "loss": 0.2079, "step": 9939 }, { "epoch": 34.275862068965516, "grad_norm": 0.6976174712181091, "learning_rate": 2.8765517241379312e-05, "loss": 0.1958, "step": 9940 }, { "epoch": 34.279310344827586, "grad_norm": 2.731640338897705, "learning_rate": 2.8765057471264367e-05, "loss": 0.1993, "step": 9941 }, { "epoch": 34.282758620689656, "grad_norm": 0.6177347898483276, "learning_rate": 2.876459770114943e-05, "loss": 0.1935, "step": 9942 }, { "epoch": 34.286206896551725, "grad_norm": 0.7521161437034607, "learning_rate": 2.8764137931034484e-05, "loss": 0.1853, "step": 9943 }, { "epoch": 34.289655172413795, "grad_norm": 0.9157514572143555, "learning_rate": 2.876367816091954e-05, "loss": 0.1834, "step": 9944 }, { "epoch": 34.293103448275865, "grad_norm": 0.731343686580658, "learning_rate": 2.87632183908046e-05, "loss": 0.1924, "step": 9945 }, { "epoch": 34.296551724137935, "grad_norm": 0.8868173360824585, "learning_rate": 2.8762758620689657e-05, "loss": 0.1796, "step": 9946 }, { "epoch": 34.3, "grad_norm": 1.106668472290039, "learning_rate": 2.8762298850574716e-05, "loss": 0.2069, "step": 9947 }, { "epoch": 34.30344827586207, "grad_norm": 0.6917393803596497, "learning_rate": 2.876183908045977e-05, "loss": 0.1724, "step": 9948 }, { "epoch": 34.30689655172414, "grad_norm": 0.6563499569892883, "learning_rate": 2.8761379310344826e-05, "loss": 0.1766, "step": 9949 }, { "epoch": 34.310344827586206, "grad_norm": 0.66151362657547, "learning_rate": 2.8760919540229885e-05, "loss": 0.1713, "step": 9950 }, { "epoch": 34.313793103448276, "grad_norm": 0.770255446434021, "learning_rate": 2.8760459770114944e-05, "loss": 0.1781, "step": 9951 }, { "epoch": 34.317241379310346, "grad_norm": 0.5773349404335022, "learning_rate": 2.876e-05, "loss": 0.1751, "step": 9952 }, { "epoch": 34.320689655172416, "grad_norm": 0.6322439312934875, "learning_rate": 2.8759540229885058e-05, "loss": 0.1592, "step": 9953 }, { "epoch": 34.324137931034485, "grad_norm": 1.8881258964538574, "learning_rate": 2.8759080459770113e-05, "loss": 0.1591, "step": 9954 }, { "epoch": 34.327586206896555, "grad_norm": 0.6433152556419373, "learning_rate": 2.8758620689655175e-05, "loss": 0.1618, "step": 9955 }, { "epoch": 34.33103448275862, "grad_norm": 0.7957774996757507, "learning_rate": 2.875816091954023e-05, "loss": 0.1602, "step": 9956 }, { "epoch": 34.33448275862069, "grad_norm": 1.409785509109497, "learning_rate": 2.8757701149425286e-05, "loss": 0.1572, "step": 9957 }, { "epoch": 34.33793103448276, "grad_norm": 1.0032455921173096, "learning_rate": 2.8757241379310344e-05, "loss": 0.1629, "step": 9958 }, { "epoch": 34.34137931034483, "grad_norm": 1.0930094718933105, "learning_rate": 2.8756781609195403e-05, "loss": 0.1901, "step": 9959 }, { "epoch": 34.3448275862069, "grad_norm": 1.1566962003707886, "learning_rate": 2.8756321839080462e-05, "loss": 0.2264, "step": 9960 }, { "epoch": 34.34827586206897, "grad_norm": 0.5088309645652771, "learning_rate": 2.8755862068965517e-05, "loss": 0.2457, "step": 9961 }, { "epoch": 34.351724137931036, "grad_norm": 1.0747133493423462, "learning_rate": 2.8755402298850572e-05, "loss": 0.2367, "step": 9962 }, { "epoch": 34.355172413793106, "grad_norm": 1.0331971645355225, "learning_rate": 2.8754942528735634e-05, "loss": 0.2104, "step": 9963 }, { "epoch": 34.358620689655176, "grad_norm": 0.6274105310440063, "learning_rate": 2.875448275862069e-05, "loss": 0.2315, "step": 9964 }, { "epoch": 34.36206896551724, "grad_norm": 0.9033324718475342, "learning_rate": 2.875402298850575e-05, "loss": 0.2189, "step": 9965 }, { "epoch": 34.36551724137931, "grad_norm": 0.9541386961936951, "learning_rate": 2.8753563218390804e-05, "loss": 0.1908, "step": 9966 }, { "epoch": 34.36896551724138, "grad_norm": 0.7749610543251038, "learning_rate": 2.8753103448275862e-05, "loss": 0.1917, "step": 9967 }, { "epoch": 34.37241379310345, "grad_norm": 1.2126247882843018, "learning_rate": 2.875264367816092e-05, "loss": 0.1904, "step": 9968 }, { "epoch": 34.37586206896552, "grad_norm": 0.5838993191719055, "learning_rate": 2.8752183908045976e-05, "loss": 0.1742, "step": 9969 }, { "epoch": 34.37931034482759, "grad_norm": 1.040053129196167, "learning_rate": 2.8751724137931035e-05, "loss": 0.1709, "step": 9970 }, { "epoch": 34.38275862068966, "grad_norm": 0.9044457077980042, "learning_rate": 2.8751264367816094e-05, "loss": 0.1922, "step": 9971 }, { "epoch": 34.38620689655173, "grad_norm": 0.758162796497345, "learning_rate": 2.875080459770115e-05, "loss": 0.1888, "step": 9972 }, { "epoch": 34.389655172413796, "grad_norm": 0.6668024063110352, "learning_rate": 2.8750344827586208e-05, "loss": 0.1955, "step": 9973 }, { "epoch": 34.39310344827586, "grad_norm": 0.9939645528793335, "learning_rate": 2.8749885057471263e-05, "loss": 0.1902, "step": 9974 }, { "epoch": 34.39655172413793, "grad_norm": 0.9312494397163391, "learning_rate": 2.8749425287356325e-05, "loss": 0.1855, "step": 9975 }, { "epoch": 34.4, "grad_norm": 0.9590523838996887, "learning_rate": 2.874896551724138e-05, "loss": 0.2099, "step": 9976 }, { "epoch": 34.40344827586207, "grad_norm": 0.7098699808120728, "learning_rate": 2.8748505747126436e-05, "loss": 0.1474, "step": 9977 }, { "epoch": 34.40689655172414, "grad_norm": 5.125970363616943, "learning_rate": 2.8748045977011494e-05, "loss": 0.1733, "step": 9978 }, { "epoch": 34.41034482758621, "grad_norm": 0.7104945778846741, "learning_rate": 2.8747586206896553e-05, "loss": 0.1747, "step": 9979 }, { "epoch": 34.41379310344828, "grad_norm": 1.0348694324493408, "learning_rate": 2.874712643678161e-05, "loss": 0.1977, "step": 9980 }, { "epoch": 34.41724137931035, "grad_norm": 0.8044525980949402, "learning_rate": 2.8746666666666667e-05, "loss": 0.1859, "step": 9981 }, { "epoch": 34.42068965517242, "grad_norm": 1.3749035596847534, "learning_rate": 2.8746206896551722e-05, "loss": 0.141, "step": 9982 }, { "epoch": 34.42413793103448, "grad_norm": 4.69795036315918, "learning_rate": 2.8745747126436784e-05, "loss": 0.1651, "step": 9983 }, { "epoch": 34.42758620689655, "grad_norm": 1.1310187578201294, "learning_rate": 2.874528735632184e-05, "loss": 0.1803, "step": 9984 }, { "epoch": 34.43103448275862, "grad_norm": 3.548145055770874, "learning_rate": 2.8744827586206895e-05, "loss": 0.2296, "step": 9985 }, { "epoch": 34.43448275862069, "grad_norm": 0.9603576064109802, "learning_rate": 2.8744367816091954e-05, "loss": 0.2596, "step": 9986 }, { "epoch": 34.43793103448276, "grad_norm": 0.7253764867782593, "learning_rate": 2.8743908045977012e-05, "loss": 0.206, "step": 9987 }, { "epoch": 34.44137931034483, "grad_norm": 2.1516404151916504, "learning_rate": 2.874344827586207e-05, "loss": 0.201, "step": 9988 }, { "epoch": 34.4448275862069, "grad_norm": 0.7371857762336731, "learning_rate": 2.8742988505747126e-05, "loss": 0.2008, "step": 9989 }, { "epoch": 34.44827586206897, "grad_norm": 0.658717155456543, "learning_rate": 2.874252873563218e-05, "loss": 0.191, "step": 9990 }, { "epoch": 34.45172413793104, "grad_norm": 1.1458375453948975, "learning_rate": 2.8742068965517244e-05, "loss": 0.2341, "step": 9991 }, { "epoch": 34.4551724137931, "grad_norm": 0.9222790598869324, "learning_rate": 2.87416091954023e-05, "loss": 0.21, "step": 9992 }, { "epoch": 34.45862068965517, "grad_norm": 1.2436450719833374, "learning_rate": 2.8741149425287358e-05, "loss": 0.2092, "step": 9993 }, { "epoch": 34.46206896551724, "grad_norm": 0.7212373614311218, "learning_rate": 2.8740689655172413e-05, "loss": 0.1986, "step": 9994 }, { "epoch": 34.46551724137931, "grad_norm": 0.7816070318222046, "learning_rate": 2.874022988505747e-05, "loss": 0.1973, "step": 9995 }, { "epoch": 34.46896551724138, "grad_norm": 0.6843519806861877, "learning_rate": 2.873977011494253e-05, "loss": 0.1765, "step": 9996 }, { "epoch": 34.47241379310345, "grad_norm": 1.180918574333191, "learning_rate": 2.8739310344827586e-05, "loss": 0.1967, "step": 9997 }, { "epoch": 34.47586206896552, "grad_norm": 0.9182568788528442, "learning_rate": 2.8738850574712644e-05, "loss": 0.1684, "step": 9998 }, { "epoch": 34.47931034482759, "grad_norm": 0.6228621602058411, "learning_rate": 2.8738390804597703e-05, "loss": 0.1819, "step": 9999 }, { "epoch": 34.48275862068966, "grad_norm": 0.5449795722961426, "learning_rate": 2.8737931034482758e-05, "loss": 0.1852, "step": 10000 }, { "epoch": 34.48275862068966, "eval_cer": 0.13260651047707414, "eval_loss": 0.3291938006877899, "eval_runtime": 19.4831, "eval_samples_per_second": 47.58, "eval_steps_per_second": 0.154, "eval_wer": 0.30667701863354035, "step": 10000 }, { "epoch": 34.48620689655172, "grad_norm": 0.7385279536247253, "learning_rate": 2.8737471264367817e-05, "loss": 0.1674, "step": 10001 }, { "epoch": 34.48965517241379, "grad_norm": 0.8361315727233887, "learning_rate": 2.8737011494252872e-05, "loss": 0.1727, "step": 10002 }, { "epoch": 34.49310344827586, "grad_norm": 0.9729140996932983, "learning_rate": 2.8736551724137934e-05, "loss": 0.1755, "step": 10003 }, { "epoch": 34.49655172413793, "grad_norm": 0.7011030912399292, "learning_rate": 2.873609195402299e-05, "loss": 0.1572, "step": 10004 }, { "epoch": 34.5, "grad_norm": 0.6613340377807617, "learning_rate": 2.8735632183908045e-05, "loss": 0.1719, "step": 10005 }, { "epoch": 34.50344827586207, "grad_norm": 0.816606342792511, "learning_rate": 2.8735172413793104e-05, "loss": 0.152, "step": 10006 }, { "epoch": 34.50689655172414, "grad_norm": 1.038905382156372, "learning_rate": 2.8734712643678162e-05, "loss": 0.1724, "step": 10007 }, { "epoch": 34.51034482758621, "grad_norm": 0.7747455835342407, "learning_rate": 2.873425287356322e-05, "loss": 0.1646, "step": 10008 }, { "epoch": 34.51379310344828, "grad_norm": 0.8553284406661987, "learning_rate": 2.8733793103448276e-05, "loss": 0.1921, "step": 10009 }, { "epoch": 34.51724137931034, "grad_norm": 1.1590427160263062, "learning_rate": 2.873333333333333e-05, "loss": 0.1899, "step": 10010 }, { "epoch": 34.52068965517241, "grad_norm": 0.7357019782066345, "learning_rate": 2.8732873563218394e-05, "loss": 0.2469, "step": 10011 }, { "epoch": 34.52413793103448, "grad_norm": 0.6075083017349243, "learning_rate": 2.873241379310345e-05, "loss": 0.207, "step": 10012 }, { "epoch": 34.52758620689655, "grad_norm": 0.8256717324256897, "learning_rate": 2.8731954022988504e-05, "loss": 0.2373, "step": 10013 }, { "epoch": 34.53103448275862, "grad_norm": 0.9862198829650879, "learning_rate": 2.8731494252873563e-05, "loss": 0.2076, "step": 10014 }, { "epoch": 34.53448275862069, "grad_norm": 1.1257680654525757, "learning_rate": 2.873103448275862e-05, "loss": 0.1862, "step": 10015 }, { "epoch": 34.53793103448276, "grad_norm": 0.944973349571228, "learning_rate": 2.873057471264368e-05, "loss": 0.2013, "step": 10016 }, { "epoch": 34.54137931034483, "grad_norm": 1.3203409910202026, "learning_rate": 2.8730114942528736e-05, "loss": 0.2011, "step": 10017 }, { "epoch": 34.5448275862069, "grad_norm": 0.7248476147651672, "learning_rate": 2.872965517241379e-05, "loss": 0.2138, "step": 10018 }, { "epoch": 34.54827586206896, "grad_norm": 1.300368070602417, "learning_rate": 2.8729195402298853e-05, "loss": 0.1682, "step": 10019 }, { "epoch": 34.55172413793103, "grad_norm": 0.656367838382721, "learning_rate": 2.8728735632183908e-05, "loss": 0.2023, "step": 10020 }, { "epoch": 34.5551724137931, "grad_norm": 1.7588452100753784, "learning_rate": 2.8728275862068967e-05, "loss": 0.1855, "step": 10021 }, { "epoch": 34.55862068965517, "grad_norm": 0.7077623605728149, "learning_rate": 2.8727816091954022e-05, "loss": 0.1844, "step": 10022 }, { "epoch": 34.56206896551724, "grad_norm": 0.719720184803009, "learning_rate": 2.872735632183908e-05, "loss": 0.1693, "step": 10023 }, { "epoch": 34.56551724137931, "grad_norm": 1.0776017904281616, "learning_rate": 2.872689655172414e-05, "loss": 0.1722, "step": 10024 }, { "epoch": 34.56896551724138, "grad_norm": 0.6149874925613403, "learning_rate": 2.8726436781609195e-05, "loss": 0.18, "step": 10025 }, { "epoch": 34.57241379310345, "grad_norm": 0.577176034450531, "learning_rate": 2.8725977011494254e-05, "loss": 0.1806, "step": 10026 }, { "epoch": 34.57586206896552, "grad_norm": 0.8395623564720154, "learning_rate": 2.8725517241379312e-05, "loss": 0.1596, "step": 10027 }, { "epoch": 34.57931034482758, "grad_norm": 0.6817365884780884, "learning_rate": 2.8725057471264368e-05, "loss": 0.1713, "step": 10028 }, { "epoch": 34.58275862068965, "grad_norm": 0.8787447810173035, "learning_rate": 2.8724597701149426e-05, "loss": 0.1882, "step": 10029 }, { "epoch": 34.58620689655172, "grad_norm": 1.29002845287323, "learning_rate": 2.872413793103448e-05, "loss": 0.1614, "step": 10030 }, { "epoch": 34.58965517241379, "grad_norm": 0.9629698991775513, "learning_rate": 2.8723678160919544e-05, "loss": 0.177, "step": 10031 }, { "epoch": 34.59310344827586, "grad_norm": 1.0744664669036865, "learning_rate": 2.87232183908046e-05, "loss": 0.1782, "step": 10032 }, { "epoch": 34.59655172413793, "grad_norm": 1.834007978439331, "learning_rate": 2.8722758620689654e-05, "loss": 0.1516, "step": 10033 }, { "epoch": 34.6, "grad_norm": 1.3912657499313354, "learning_rate": 2.8722298850574713e-05, "loss": 0.1798, "step": 10034 }, { "epoch": 34.60344827586207, "grad_norm": 1.334637999534607, "learning_rate": 2.872183908045977e-05, "loss": 0.2305, "step": 10035 }, { "epoch": 34.60689655172414, "grad_norm": 0.7629581093788147, "learning_rate": 2.872137931034483e-05, "loss": 0.2728, "step": 10036 }, { "epoch": 34.610344827586204, "grad_norm": 1.1858633756637573, "learning_rate": 2.8720919540229885e-05, "loss": 0.2139, "step": 10037 }, { "epoch": 34.61379310344827, "grad_norm": 0.9996315836906433, "learning_rate": 2.872045977011494e-05, "loss": 0.2119, "step": 10038 }, { "epoch": 34.61724137931034, "grad_norm": 0.4902673363685608, "learning_rate": 2.8720000000000003e-05, "loss": 0.217, "step": 10039 }, { "epoch": 34.62068965517241, "grad_norm": 0.8617376685142517, "learning_rate": 2.8719540229885058e-05, "loss": 0.1941, "step": 10040 }, { "epoch": 34.62413793103448, "grad_norm": 0.7555685639381409, "learning_rate": 2.8719080459770113e-05, "loss": 0.1932, "step": 10041 }, { "epoch": 34.62758620689655, "grad_norm": 0.5660073757171631, "learning_rate": 2.8718620689655172e-05, "loss": 0.2253, "step": 10042 }, { "epoch": 34.63103448275862, "grad_norm": 0.8065065741539001, "learning_rate": 2.871816091954023e-05, "loss": 0.1907, "step": 10043 }, { "epoch": 34.63448275862069, "grad_norm": 0.572404146194458, "learning_rate": 2.871770114942529e-05, "loss": 0.185, "step": 10044 }, { "epoch": 34.63793103448276, "grad_norm": 0.8077570199966431, "learning_rate": 2.8717241379310345e-05, "loss": 0.1968, "step": 10045 }, { "epoch": 34.641379310344824, "grad_norm": 0.5297030210494995, "learning_rate": 2.87167816091954e-05, "loss": 0.2039, "step": 10046 }, { "epoch": 34.644827586206894, "grad_norm": 0.5970950722694397, "learning_rate": 2.8716321839080462e-05, "loss": 0.1855, "step": 10047 }, { "epoch": 34.648275862068964, "grad_norm": 0.5937606692314148, "learning_rate": 2.8715862068965517e-05, "loss": 0.2026, "step": 10048 }, { "epoch": 34.65172413793103, "grad_norm": 0.6518946290016174, "learning_rate": 2.8715402298850576e-05, "loss": 0.1719, "step": 10049 }, { "epoch": 34.6551724137931, "grad_norm": 1.4072115421295166, "learning_rate": 2.871494252873563e-05, "loss": 0.1989, "step": 10050 }, { "epoch": 34.65862068965517, "grad_norm": 0.6327835917472839, "learning_rate": 2.871448275862069e-05, "loss": 0.1554, "step": 10051 }, { "epoch": 34.66206896551724, "grad_norm": 0.8390635848045349, "learning_rate": 2.871402298850575e-05, "loss": 0.1702, "step": 10052 }, { "epoch": 34.66551724137931, "grad_norm": 2.0111851692199707, "learning_rate": 2.8713563218390804e-05, "loss": 0.1723, "step": 10053 }, { "epoch": 34.66896551724138, "grad_norm": 1.1421759128570557, "learning_rate": 2.8713103448275863e-05, "loss": 0.1784, "step": 10054 }, { "epoch": 34.672413793103445, "grad_norm": 0.872897207736969, "learning_rate": 2.871264367816092e-05, "loss": 0.1816, "step": 10055 }, { "epoch": 34.675862068965515, "grad_norm": 0.7261935472488403, "learning_rate": 2.8712183908045977e-05, "loss": 0.1478, "step": 10056 }, { "epoch": 34.679310344827584, "grad_norm": 0.7848922610282898, "learning_rate": 2.8711724137931035e-05, "loss": 0.1844, "step": 10057 }, { "epoch": 34.682758620689654, "grad_norm": 0.73310387134552, "learning_rate": 2.871126436781609e-05, "loss": 0.1408, "step": 10058 }, { "epoch": 34.686206896551724, "grad_norm": 21.692832946777344, "learning_rate": 2.8710804597701153e-05, "loss": 0.1611, "step": 10059 }, { "epoch": 34.689655172413794, "grad_norm": 0.9243953227996826, "learning_rate": 2.8710344827586208e-05, "loss": 0.2662, "step": 10060 }, { "epoch": 34.69310344827586, "grad_norm": 0.6444424986839294, "learning_rate": 2.8709885057471263e-05, "loss": 0.2555, "step": 10061 }, { "epoch": 34.69655172413793, "grad_norm": 0.7605288028717041, "learning_rate": 2.8709425287356322e-05, "loss": 0.2151, "step": 10062 }, { "epoch": 34.7, "grad_norm": 0.7908433675765991, "learning_rate": 2.870896551724138e-05, "loss": 0.2181, "step": 10063 }, { "epoch": 34.703448275862065, "grad_norm": 0.5930444002151489, "learning_rate": 2.870850574712644e-05, "loss": 0.2159, "step": 10064 }, { "epoch": 34.706896551724135, "grad_norm": 0.49576184153556824, "learning_rate": 2.8708045977011495e-05, "loss": 0.1936, "step": 10065 }, { "epoch": 34.710344827586205, "grad_norm": 1.0250011682510376, "learning_rate": 2.870758620689655e-05, "loss": 0.2139, "step": 10066 }, { "epoch": 34.713793103448275, "grad_norm": 0.7345724105834961, "learning_rate": 2.8707126436781612e-05, "loss": 0.2123, "step": 10067 }, { "epoch": 34.717241379310344, "grad_norm": 0.7707287669181824, "learning_rate": 2.8706666666666667e-05, "loss": 0.1957, "step": 10068 }, { "epoch": 34.720689655172414, "grad_norm": 0.8321088552474976, "learning_rate": 2.8706206896551723e-05, "loss": 0.1958, "step": 10069 }, { "epoch": 34.724137931034484, "grad_norm": 0.9802764654159546, "learning_rate": 2.870574712643678e-05, "loss": 0.1904, "step": 10070 }, { "epoch": 34.727586206896554, "grad_norm": 1.0398088693618774, "learning_rate": 2.870528735632184e-05, "loss": 0.1834, "step": 10071 }, { "epoch": 34.73103448275862, "grad_norm": 1.9314639568328857, "learning_rate": 2.87048275862069e-05, "loss": 0.2053, "step": 10072 }, { "epoch": 34.734482758620686, "grad_norm": 0.6818374991416931, "learning_rate": 2.8704367816091954e-05, "loss": 0.2029, "step": 10073 }, { "epoch": 34.737931034482756, "grad_norm": 1.1860889196395874, "learning_rate": 2.870390804597701e-05, "loss": 0.1877, "step": 10074 }, { "epoch": 34.741379310344826, "grad_norm": 0.7595455646514893, "learning_rate": 2.870344827586207e-05, "loss": 0.1883, "step": 10075 }, { "epoch": 34.744827586206895, "grad_norm": 0.5626807808876038, "learning_rate": 2.8702988505747127e-05, "loss": 0.1576, "step": 10076 }, { "epoch": 34.748275862068965, "grad_norm": 1.2064045667648315, "learning_rate": 2.8702528735632185e-05, "loss": 0.1812, "step": 10077 }, { "epoch": 34.751724137931035, "grad_norm": 0.6606716513633728, "learning_rate": 2.870206896551724e-05, "loss": 0.172, "step": 10078 }, { "epoch": 34.755172413793105, "grad_norm": 0.6777728796005249, "learning_rate": 2.87016091954023e-05, "loss": 0.1631, "step": 10079 }, { "epoch": 34.758620689655174, "grad_norm": 1.329119324684143, "learning_rate": 2.8701149425287358e-05, "loss": 0.1696, "step": 10080 }, { "epoch": 34.762068965517244, "grad_norm": 0.7151396870613098, "learning_rate": 2.8700689655172413e-05, "loss": 0.1584, "step": 10081 }, { "epoch": 34.765517241379314, "grad_norm": 0.6792296767234802, "learning_rate": 2.8700229885057472e-05, "loss": 0.17, "step": 10082 }, { "epoch": 34.76896551724138, "grad_norm": 0.857546865940094, "learning_rate": 2.869977011494253e-05, "loss": 0.1683, "step": 10083 }, { "epoch": 34.772413793103446, "grad_norm": 1.2686654329299927, "learning_rate": 2.8699310344827586e-05, "loss": 0.1928, "step": 10084 }, { "epoch": 34.775862068965516, "grad_norm": 1.2945857048034668, "learning_rate": 2.8698850574712645e-05, "loss": 0.222, "step": 10085 }, { "epoch": 34.779310344827586, "grad_norm": 0.7951591610908508, "learning_rate": 2.86983908045977e-05, "loss": 0.2713, "step": 10086 }, { "epoch": 34.782758620689656, "grad_norm": 0.6739210486412048, "learning_rate": 2.8697931034482762e-05, "loss": 0.2307, "step": 10087 }, { "epoch": 34.786206896551725, "grad_norm": 0.8018574714660645, "learning_rate": 2.8697471264367817e-05, "loss": 0.1979, "step": 10088 }, { "epoch": 34.789655172413795, "grad_norm": 0.6053676605224609, "learning_rate": 2.8697011494252873e-05, "loss": 0.2034, "step": 10089 }, { "epoch": 34.793103448275865, "grad_norm": 0.5448389649391174, "learning_rate": 2.869655172413793e-05, "loss": 0.2106, "step": 10090 }, { "epoch": 34.796551724137935, "grad_norm": 0.6881526708602905, "learning_rate": 2.869609195402299e-05, "loss": 0.2033, "step": 10091 }, { "epoch": 34.8, "grad_norm": 0.8946785926818848, "learning_rate": 2.869563218390805e-05, "loss": 0.2, "step": 10092 }, { "epoch": 34.80344827586207, "grad_norm": 0.7963598966598511, "learning_rate": 2.8695172413793104e-05, "loss": 0.1891, "step": 10093 }, { "epoch": 34.80689655172414, "grad_norm": 0.8014782071113586, "learning_rate": 2.869471264367816e-05, "loss": 0.1846, "step": 10094 }, { "epoch": 34.810344827586206, "grad_norm": 0.6565030813217163, "learning_rate": 2.869425287356322e-05, "loss": 0.1789, "step": 10095 }, { "epoch": 34.813793103448276, "grad_norm": 1.5708668231964111, "learning_rate": 2.8693793103448277e-05, "loss": 0.1799, "step": 10096 }, { "epoch": 34.817241379310346, "grad_norm": 0.48420876264572144, "learning_rate": 2.8693333333333335e-05, "loss": 0.1726, "step": 10097 }, { "epoch": 34.820689655172416, "grad_norm": 0.8854993581771851, "learning_rate": 2.869287356321839e-05, "loss": 0.1806, "step": 10098 }, { "epoch": 34.824137931034485, "grad_norm": 1.0368969440460205, "learning_rate": 2.869241379310345e-05, "loss": 0.1623, "step": 10099 }, { "epoch": 34.827586206896555, "grad_norm": 0.9527176022529602, "learning_rate": 2.8691954022988508e-05, "loss": 0.2022, "step": 10100 }, { "epoch": 34.83103448275862, "grad_norm": 0.8154616951942444, "learning_rate": 2.8691494252873563e-05, "loss": 0.1634, "step": 10101 }, { "epoch": 34.83448275862069, "grad_norm": 0.6441482305526733, "learning_rate": 2.869103448275862e-05, "loss": 0.1501, "step": 10102 }, { "epoch": 34.83793103448276, "grad_norm": 0.7318999767303467, "learning_rate": 2.869057471264368e-05, "loss": 0.1658, "step": 10103 }, { "epoch": 34.84137931034483, "grad_norm": 1.1720696687698364, "learning_rate": 2.8690114942528736e-05, "loss": 0.1698, "step": 10104 }, { "epoch": 34.8448275862069, "grad_norm": 1.299375057220459, "learning_rate": 2.8689655172413795e-05, "loss": 0.1441, "step": 10105 }, { "epoch": 34.84827586206897, "grad_norm": 0.8736996054649353, "learning_rate": 2.868919540229885e-05, "loss": 0.1764, "step": 10106 }, { "epoch": 34.851724137931036, "grad_norm": 1.2081992626190186, "learning_rate": 2.868873563218391e-05, "loss": 0.1707, "step": 10107 }, { "epoch": 34.855172413793106, "grad_norm": 0.8058651685714722, "learning_rate": 2.8688275862068967e-05, "loss": 0.1777, "step": 10108 }, { "epoch": 34.858620689655176, "grad_norm": 7.116538047790527, "learning_rate": 2.8687816091954023e-05, "loss": 0.1576, "step": 10109 }, { "epoch": 34.86206896551724, "grad_norm": 1.3565130233764648, "learning_rate": 2.868735632183908e-05, "loss": 0.2051, "step": 10110 }, { "epoch": 34.86551724137931, "grad_norm": 0.8830981850624084, "learning_rate": 2.868689655172414e-05, "loss": 0.265, "step": 10111 }, { "epoch": 34.86896551724138, "grad_norm": 0.8869468569755554, "learning_rate": 2.8686436781609195e-05, "loss": 0.2406, "step": 10112 }, { "epoch": 34.87241379310345, "grad_norm": 0.8635165095329285, "learning_rate": 2.8685977011494254e-05, "loss": 0.2357, "step": 10113 }, { "epoch": 34.87586206896552, "grad_norm": 0.5215117335319519, "learning_rate": 2.868551724137931e-05, "loss": 0.2155, "step": 10114 }, { "epoch": 34.87931034482759, "grad_norm": 0.5928959846496582, "learning_rate": 2.868505747126437e-05, "loss": 0.2278, "step": 10115 }, { "epoch": 34.88275862068966, "grad_norm": 0.5451503992080688, "learning_rate": 2.8684597701149427e-05, "loss": 0.22, "step": 10116 }, { "epoch": 34.88620689655173, "grad_norm": 0.8722317814826965, "learning_rate": 2.8684137931034482e-05, "loss": 0.1937, "step": 10117 }, { "epoch": 34.889655172413796, "grad_norm": 0.7911489605903625, "learning_rate": 2.868367816091954e-05, "loss": 0.2306, "step": 10118 }, { "epoch": 34.89310344827586, "grad_norm": 1.4006762504577637, "learning_rate": 2.86832183908046e-05, "loss": 0.2081, "step": 10119 }, { "epoch": 34.89655172413793, "grad_norm": 1.0606504678726196, "learning_rate": 2.8682758620689658e-05, "loss": 0.1821, "step": 10120 }, { "epoch": 34.9, "grad_norm": 1.8757413625717163, "learning_rate": 2.8682298850574713e-05, "loss": 0.1927, "step": 10121 }, { "epoch": 34.90344827586207, "grad_norm": 2.135897636413574, "learning_rate": 2.868183908045977e-05, "loss": 0.2018, "step": 10122 }, { "epoch": 34.90689655172414, "grad_norm": 1.7178521156311035, "learning_rate": 2.868137931034483e-05, "loss": 0.1828, "step": 10123 }, { "epoch": 34.91034482758621, "grad_norm": 0.6530880928039551, "learning_rate": 2.8680919540229886e-05, "loss": 0.1901, "step": 10124 }, { "epoch": 34.91379310344828, "grad_norm": 0.6263763308525085, "learning_rate": 2.8680459770114945e-05, "loss": 0.1705, "step": 10125 }, { "epoch": 34.91724137931035, "grad_norm": 1.128545880317688, "learning_rate": 2.868e-05, "loss": 0.1771, "step": 10126 }, { "epoch": 34.92068965517242, "grad_norm": 1.1017367839813232, "learning_rate": 2.867954022988506e-05, "loss": 0.182, "step": 10127 }, { "epoch": 34.92413793103448, "grad_norm": 1.3366315364837646, "learning_rate": 2.8679080459770117e-05, "loss": 0.1756, "step": 10128 }, { "epoch": 34.92758620689655, "grad_norm": 0.5983278751373291, "learning_rate": 2.8678620689655173e-05, "loss": 0.1613, "step": 10129 }, { "epoch": 34.93103448275862, "grad_norm": 0.7219371795654297, "learning_rate": 2.8678160919540228e-05, "loss": 0.1752, "step": 10130 }, { "epoch": 34.93448275862069, "grad_norm": 1.107809066772461, "learning_rate": 2.867770114942529e-05, "loss": 0.1612, "step": 10131 }, { "epoch": 34.93793103448276, "grad_norm": 1.1848219633102417, "learning_rate": 2.8677241379310345e-05, "loss": 0.1645, "step": 10132 }, { "epoch": 34.94137931034483, "grad_norm": 0.8319786190986633, "learning_rate": 2.8676781609195404e-05, "loss": 0.1626, "step": 10133 }, { "epoch": 34.9448275862069, "grad_norm": 1.1478711366653442, "learning_rate": 2.867632183908046e-05, "loss": 0.2049, "step": 10134 }, { "epoch": 34.94827586206897, "grad_norm": 1.3294427394866943, "learning_rate": 2.8675862068965518e-05, "loss": 0.2341, "step": 10135 }, { "epoch": 34.95172413793104, "grad_norm": 0.7287530899047852, "learning_rate": 2.8675402298850577e-05, "loss": 0.2406, "step": 10136 }, { "epoch": 34.9551724137931, "grad_norm": 0.7058027982711792, "learning_rate": 2.8674942528735632e-05, "loss": 0.2072, "step": 10137 }, { "epoch": 34.95862068965517, "grad_norm": 0.8550015687942505, "learning_rate": 2.867448275862069e-05, "loss": 0.2144, "step": 10138 }, { "epoch": 34.96206896551724, "grad_norm": 0.8025185465812683, "learning_rate": 2.867402298850575e-05, "loss": 0.2155, "step": 10139 }, { "epoch": 34.96551724137931, "grad_norm": 1.4470614194869995, "learning_rate": 2.8673563218390804e-05, "loss": 0.1958, "step": 10140 }, { "epoch": 34.96896551724138, "grad_norm": 0.582530677318573, "learning_rate": 2.8673103448275863e-05, "loss": 0.204, "step": 10141 }, { "epoch": 34.97241379310345, "grad_norm": 0.8539747595787048, "learning_rate": 2.867264367816092e-05, "loss": 0.1742, "step": 10142 }, { "epoch": 34.97586206896552, "grad_norm": 0.5841432809829712, "learning_rate": 2.867218390804598e-05, "loss": 0.1714, "step": 10143 }, { "epoch": 34.97931034482759, "grad_norm": 0.6492300629615784, "learning_rate": 2.8671724137931036e-05, "loss": 0.1763, "step": 10144 }, { "epoch": 34.98275862068966, "grad_norm": 1.1607105731964111, "learning_rate": 2.867126436781609e-05, "loss": 0.1762, "step": 10145 }, { "epoch": 34.98620689655172, "grad_norm": 0.6854648590087891, "learning_rate": 2.867080459770115e-05, "loss": 0.174, "step": 10146 }, { "epoch": 34.98965517241379, "grad_norm": 1.178310751914978, "learning_rate": 2.867034482758621e-05, "loss": 0.1563, "step": 10147 }, { "epoch": 34.99310344827586, "grad_norm": 0.6277459263801575, "learning_rate": 2.8669885057471267e-05, "loss": 0.1561, "step": 10148 }, { "epoch": 34.99655172413793, "grad_norm": 1.3239656686782837, "learning_rate": 2.8669425287356322e-05, "loss": 0.1391, "step": 10149 }, { "epoch": 35.0, "grad_norm": 1.1460143327713013, "learning_rate": 2.8668965517241378e-05, "loss": 0.2633, "step": 10150 }, { "epoch": 35.00344827586207, "grad_norm": 0.6012634634971619, "learning_rate": 2.866850574712644e-05, "loss": 0.2496, "step": 10151 }, { "epoch": 35.00689655172414, "grad_norm": 0.5093791484832764, "learning_rate": 2.8668045977011495e-05, "loss": 0.2032, "step": 10152 }, { "epoch": 35.01034482758621, "grad_norm": 0.5755361914634705, "learning_rate": 2.8667586206896554e-05, "loss": 0.2008, "step": 10153 }, { "epoch": 35.01379310344828, "grad_norm": 0.7438570857048035, "learning_rate": 2.866712643678161e-05, "loss": 0.1907, "step": 10154 }, { "epoch": 35.01724137931034, "grad_norm": 1.2386488914489746, "learning_rate": 2.8666666666666668e-05, "loss": 0.2076, "step": 10155 }, { "epoch": 35.02068965517241, "grad_norm": 0.725104808807373, "learning_rate": 2.8666206896551726e-05, "loss": 0.1814, "step": 10156 }, { "epoch": 35.02413793103448, "grad_norm": 1.4488894939422607, "learning_rate": 2.8665747126436782e-05, "loss": 0.1996, "step": 10157 }, { "epoch": 35.02758620689655, "grad_norm": 0.49094685912132263, "learning_rate": 2.8665287356321837e-05, "loss": 0.1699, "step": 10158 }, { "epoch": 35.03103448275862, "grad_norm": 1.188096523284912, "learning_rate": 2.86648275862069e-05, "loss": 0.1808, "step": 10159 }, { "epoch": 35.03448275862069, "grad_norm": 0.7098057866096497, "learning_rate": 2.8664367816091954e-05, "loss": 0.1701, "step": 10160 }, { "epoch": 35.03793103448276, "grad_norm": 0.7821598649024963, "learning_rate": 2.8663908045977013e-05, "loss": 0.1677, "step": 10161 }, { "epoch": 35.04137931034483, "grad_norm": 0.5614485144615173, "learning_rate": 2.866344827586207e-05, "loss": 0.1652, "step": 10162 }, { "epoch": 35.0448275862069, "grad_norm": 0.6360791921615601, "learning_rate": 2.8662988505747127e-05, "loss": 0.1612, "step": 10163 }, { "epoch": 35.04827586206896, "grad_norm": 0.6944830417633057, "learning_rate": 2.8662528735632186e-05, "loss": 0.1808, "step": 10164 }, { "epoch": 35.05172413793103, "grad_norm": 0.8126320242881775, "learning_rate": 2.866206896551724e-05, "loss": 0.1909, "step": 10165 }, { "epoch": 35.0551724137931, "grad_norm": 0.8010106682777405, "learning_rate": 2.86616091954023e-05, "loss": 0.1482, "step": 10166 }, { "epoch": 35.05862068965517, "grad_norm": 0.5952246189117432, "learning_rate": 2.866114942528736e-05, "loss": 0.1434, "step": 10167 }, { "epoch": 35.06206896551724, "grad_norm": 2.1549787521362305, "learning_rate": 2.8660689655172414e-05, "loss": 0.1518, "step": 10168 }, { "epoch": 35.06551724137931, "grad_norm": 0.6325023174285889, "learning_rate": 2.8660229885057472e-05, "loss": 0.125, "step": 10169 }, { "epoch": 35.06896551724138, "grad_norm": 0.6169240474700928, "learning_rate": 2.8659770114942528e-05, "loss": 0.1501, "step": 10170 }, { "epoch": 35.07241379310345, "grad_norm": 0.9925827383995056, "learning_rate": 2.865931034482759e-05, "loss": 0.1703, "step": 10171 }, { "epoch": 35.07586206896552, "grad_norm": 0.787509024143219, "learning_rate": 2.8658850574712645e-05, "loss": 0.149, "step": 10172 }, { "epoch": 35.07931034482758, "grad_norm": 1.0452619791030884, "learning_rate": 2.86583908045977e-05, "loss": 0.146, "step": 10173 }, { "epoch": 35.08275862068965, "grad_norm": 0.8586390018463135, "learning_rate": 2.865793103448276e-05, "loss": 0.1408, "step": 10174 }, { "epoch": 35.08620689655172, "grad_norm": 2.9236958026885986, "learning_rate": 2.8657471264367818e-05, "loss": 0.1892, "step": 10175 }, { "epoch": 35.08965517241379, "grad_norm": 0.5775632858276367, "learning_rate": 2.8657011494252876e-05, "loss": 0.2367, "step": 10176 }, { "epoch": 35.09310344827586, "grad_norm": 0.6513581871986389, "learning_rate": 2.865655172413793e-05, "loss": 0.234, "step": 10177 }, { "epoch": 35.09655172413793, "grad_norm": 0.5955702066421509, "learning_rate": 2.8656091954022987e-05, "loss": 0.194, "step": 10178 }, { "epoch": 35.1, "grad_norm": 0.8965678215026855, "learning_rate": 2.865563218390805e-05, "loss": 0.235, "step": 10179 }, { "epoch": 35.10344827586207, "grad_norm": 0.8585269451141357, "learning_rate": 2.8655172413793104e-05, "loss": 0.2118, "step": 10180 }, { "epoch": 35.10689655172414, "grad_norm": 0.8252213597297668, "learning_rate": 2.8654712643678163e-05, "loss": 0.1849, "step": 10181 }, { "epoch": 35.110344827586204, "grad_norm": 0.6701560020446777, "learning_rate": 2.865425287356322e-05, "loss": 0.1947, "step": 10182 }, { "epoch": 35.11379310344827, "grad_norm": 0.5394258499145508, "learning_rate": 2.8653793103448277e-05, "loss": 0.1832, "step": 10183 }, { "epoch": 35.11724137931034, "grad_norm": 0.606543242931366, "learning_rate": 2.8653333333333336e-05, "loss": 0.1932, "step": 10184 }, { "epoch": 35.12068965517241, "grad_norm": 0.4367949962615967, "learning_rate": 2.865287356321839e-05, "loss": 0.1602, "step": 10185 }, { "epoch": 35.12413793103448, "grad_norm": 0.9940876960754395, "learning_rate": 2.865241379310345e-05, "loss": 0.2054, "step": 10186 }, { "epoch": 35.12758620689655, "grad_norm": 1.2454071044921875, "learning_rate": 2.865195402298851e-05, "loss": 0.1743, "step": 10187 }, { "epoch": 35.13103448275862, "grad_norm": 0.8966359496116638, "learning_rate": 2.8651494252873564e-05, "loss": 0.1681, "step": 10188 }, { "epoch": 35.13448275862069, "grad_norm": 0.8748435378074646, "learning_rate": 2.8651034482758622e-05, "loss": 0.1549, "step": 10189 }, { "epoch": 35.13793103448276, "grad_norm": 0.6925779581069946, "learning_rate": 2.8650574712643678e-05, "loss": 0.1509, "step": 10190 }, { "epoch": 35.141379310344824, "grad_norm": 0.867839515209198, "learning_rate": 2.8650114942528736e-05, "loss": 0.1579, "step": 10191 }, { "epoch": 35.144827586206894, "grad_norm": 0.8655818700790405, "learning_rate": 2.8649655172413795e-05, "loss": 0.1483, "step": 10192 }, { "epoch": 35.148275862068964, "grad_norm": 0.9205208420753479, "learning_rate": 2.864919540229885e-05, "loss": 0.1696, "step": 10193 }, { "epoch": 35.15172413793103, "grad_norm": 0.9168076515197754, "learning_rate": 2.864873563218391e-05, "loss": 0.1532, "step": 10194 }, { "epoch": 35.1551724137931, "grad_norm": 0.7449065446853638, "learning_rate": 2.8648275862068968e-05, "loss": 0.1515, "step": 10195 }, { "epoch": 35.15862068965517, "grad_norm": 0.7067936658859253, "learning_rate": 2.8647816091954023e-05, "loss": 0.1677, "step": 10196 }, { "epoch": 35.16206896551724, "grad_norm": 0.8781799077987671, "learning_rate": 2.864735632183908e-05, "loss": 0.1477, "step": 10197 }, { "epoch": 35.16551724137931, "grad_norm": 0.5904472470283508, "learning_rate": 2.8646896551724137e-05, "loss": 0.1464, "step": 10198 }, { "epoch": 35.16896551724138, "grad_norm": 1.753385305404663, "learning_rate": 2.86464367816092e-05, "loss": 0.1622, "step": 10199 }, { "epoch": 35.172413793103445, "grad_norm": 1.81578528881073, "learning_rate": 2.8645977011494254e-05, "loss": 0.1987, "step": 10200 }, { "epoch": 35.175862068965515, "grad_norm": 0.6288548111915588, "learning_rate": 2.864551724137931e-05, "loss": 0.2632, "step": 10201 }, { "epoch": 35.179310344827584, "grad_norm": 0.7151146531105042, "learning_rate": 2.8645057471264368e-05, "loss": 0.1998, "step": 10202 }, { "epoch": 35.182758620689654, "grad_norm": 0.5560855269432068, "learning_rate": 2.8644597701149427e-05, "loss": 0.1841, "step": 10203 }, { "epoch": 35.186206896551724, "grad_norm": 1.0133695602416992, "learning_rate": 2.8644137931034486e-05, "loss": 0.2001, "step": 10204 }, { "epoch": 35.189655172413794, "grad_norm": 0.47503146529197693, "learning_rate": 2.864367816091954e-05, "loss": 0.1864, "step": 10205 }, { "epoch": 35.19310344827586, "grad_norm": 0.7081072330474854, "learning_rate": 2.8643218390804596e-05, "loss": 0.1927, "step": 10206 }, { "epoch": 35.19655172413793, "grad_norm": 0.7162263989448547, "learning_rate": 2.8642758620689658e-05, "loss": 0.1928, "step": 10207 }, { "epoch": 35.2, "grad_norm": 0.6410690546035767, "learning_rate": 2.8642298850574714e-05, "loss": 0.1984, "step": 10208 }, { "epoch": 35.203448275862065, "grad_norm": 0.6217648386955261, "learning_rate": 2.8641839080459772e-05, "loss": 0.1789, "step": 10209 }, { "epoch": 35.206896551724135, "grad_norm": 0.6525776982307434, "learning_rate": 2.8641379310344828e-05, "loss": 0.1757, "step": 10210 }, { "epoch": 35.210344827586205, "grad_norm": 0.9373620748519897, "learning_rate": 2.8640919540229886e-05, "loss": 0.1803, "step": 10211 }, { "epoch": 35.213793103448275, "grad_norm": 0.5778806209564209, "learning_rate": 2.8640459770114945e-05, "loss": 0.1819, "step": 10212 }, { "epoch": 35.217241379310344, "grad_norm": 0.9302212595939636, "learning_rate": 2.864e-05, "loss": 0.1814, "step": 10213 }, { "epoch": 35.220689655172414, "grad_norm": 1.063830018043518, "learning_rate": 2.863954022988506e-05, "loss": 0.1532, "step": 10214 }, { "epoch": 35.224137931034484, "grad_norm": 1.1861730813980103, "learning_rate": 2.8639080459770118e-05, "loss": 0.1741, "step": 10215 }, { "epoch": 35.227586206896554, "grad_norm": 1.6015124320983887, "learning_rate": 2.8638620689655173e-05, "loss": 0.1573, "step": 10216 }, { "epoch": 35.23103448275862, "grad_norm": 0.6127440929412842, "learning_rate": 2.863816091954023e-05, "loss": 0.1367, "step": 10217 }, { "epoch": 35.234482758620686, "grad_norm": 2.779848575592041, "learning_rate": 2.8637701149425287e-05, "loss": 0.1456, "step": 10218 }, { "epoch": 35.237931034482756, "grad_norm": 1.016244649887085, "learning_rate": 2.8637241379310346e-05, "loss": 0.1587, "step": 10219 }, { "epoch": 35.241379310344826, "grad_norm": 0.7303193211555481, "learning_rate": 2.8636781609195404e-05, "loss": 0.1558, "step": 10220 }, { "epoch": 35.244827586206895, "grad_norm": 1.3593546152114868, "learning_rate": 2.863632183908046e-05, "loss": 0.1817, "step": 10221 }, { "epoch": 35.248275862068965, "grad_norm": 0.6590874791145325, "learning_rate": 2.8635862068965518e-05, "loss": 0.144, "step": 10222 }, { "epoch": 35.251724137931035, "grad_norm": 1.0326639413833618, "learning_rate": 2.8635402298850577e-05, "loss": 0.175, "step": 10223 }, { "epoch": 35.255172413793105, "grad_norm": 0.8622634410858154, "learning_rate": 2.8634942528735632e-05, "loss": 0.1426, "step": 10224 }, { "epoch": 35.258620689655174, "grad_norm": 0.9926106929779053, "learning_rate": 2.863448275862069e-05, "loss": 0.1913, "step": 10225 }, { "epoch": 35.262068965517244, "grad_norm": 0.6345683336257935, "learning_rate": 2.8634022988505746e-05, "loss": 0.2721, "step": 10226 }, { "epoch": 35.265517241379314, "grad_norm": 0.7665757536888123, "learning_rate": 2.8633563218390808e-05, "loss": 0.2054, "step": 10227 }, { "epoch": 35.26896551724138, "grad_norm": 0.9437811970710754, "learning_rate": 2.8633103448275864e-05, "loss": 0.2011, "step": 10228 }, { "epoch": 35.272413793103446, "grad_norm": 1.0289161205291748, "learning_rate": 2.863264367816092e-05, "loss": 0.2091, "step": 10229 }, { "epoch": 35.275862068965516, "grad_norm": 1.5504070520401, "learning_rate": 2.8632183908045978e-05, "loss": 0.1872, "step": 10230 }, { "epoch": 35.279310344827586, "grad_norm": 0.7760083675384521, "learning_rate": 2.8631724137931036e-05, "loss": 0.1898, "step": 10231 }, { "epoch": 35.282758620689656, "grad_norm": 0.8630239963531494, "learning_rate": 2.8631264367816095e-05, "loss": 0.238, "step": 10232 }, { "epoch": 35.286206896551725, "grad_norm": 0.5736117959022522, "learning_rate": 2.863080459770115e-05, "loss": 0.1835, "step": 10233 }, { "epoch": 35.289655172413795, "grad_norm": 0.6406168937683105, "learning_rate": 2.8630344827586205e-05, "loss": 0.1961, "step": 10234 }, { "epoch": 35.293103448275865, "grad_norm": 0.8738698363304138, "learning_rate": 2.8629885057471268e-05, "loss": 0.1941, "step": 10235 }, { "epoch": 35.296551724137935, "grad_norm": 1.150514841079712, "learning_rate": 2.8629425287356323e-05, "loss": 0.1963, "step": 10236 }, { "epoch": 35.3, "grad_norm": 1.141127586364746, "learning_rate": 2.862896551724138e-05, "loss": 0.1932, "step": 10237 }, { "epoch": 35.30344827586207, "grad_norm": 0.47927024960517883, "learning_rate": 2.8628505747126437e-05, "loss": 0.1512, "step": 10238 }, { "epoch": 35.30689655172414, "grad_norm": 0.5424816012382507, "learning_rate": 2.8628045977011495e-05, "loss": 0.1702, "step": 10239 }, { "epoch": 35.310344827586206, "grad_norm": 0.6939734816551208, "learning_rate": 2.8627586206896554e-05, "loss": 0.1655, "step": 10240 }, { "epoch": 35.313793103448276, "grad_norm": 0.7115908265113831, "learning_rate": 2.862712643678161e-05, "loss": 0.1715, "step": 10241 }, { "epoch": 35.317241379310346, "grad_norm": 0.8849985003471375, "learning_rate": 2.8626666666666668e-05, "loss": 0.1875, "step": 10242 }, { "epoch": 35.320689655172416, "grad_norm": 0.7074676156044006, "learning_rate": 2.8626206896551727e-05, "loss": 0.1789, "step": 10243 }, { "epoch": 35.324137931034485, "grad_norm": 0.6603622436523438, "learning_rate": 2.8625747126436782e-05, "loss": 0.1603, "step": 10244 }, { "epoch": 35.327586206896555, "grad_norm": 0.7760853171348572, "learning_rate": 2.862528735632184e-05, "loss": 0.1446, "step": 10245 }, { "epoch": 35.33103448275862, "grad_norm": 0.9509055614471436, "learning_rate": 2.8624827586206896e-05, "loss": 0.1337, "step": 10246 }, { "epoch": 35.33448275862069, "grad_norm": 0.9376936554908752, "learning_rate": 2.8624367816091955e-05, "loss": 0.1471, "step": 10247 }, { "epoch": 35.33793103448276, "grad_norm": 1.104967713356018, "learning_rate": 2.8623908045977013e-05, "loss": 0.171, "step": 10248 }, { "epoch": 35.34137931034483, "grad_norm": 0.9493774175643921, "learning_rate": 2.862344827586207e-05, "loss": 0.1245, "step": 10249 }, { "epoch": 35.3448275862069, "grad_norm": 1.1670328378677368, "learning_rate": 2.8622988505747127e-05, "loss": 0.2042, "step": 10250 }, { "epoch": 35.34827586206897, "grad_norm": 0.7861425280570984, "learning_rate": 2.8622528735632183e-05, "loss": 0.2298, "step": 10251 }, { "epoch": 35.351724137931036, "grad_norm": 0.7824392318725586, "learning_rate": 2.862206896551724e-05, "loss": 0.2186, "step": 10252 }, { "epoch": 35.355172413793106, "grad_norm": 0.6867098212242126, "learning_rate": 2.86216091954023e-05, "loss": 0.2065, "step": 10253 }, { "epoch": 35.358620689655176, "grad_norm": 0.5858350992202759, "learning_rate": 2.8621149425287355e-05, "loss": 0.2169, "step": 10254 }, { "epoch": 35.36206896551724, "grad_norm": 1.157880187034607, "learning_rate": 2.8620689655172414e-05, "loss": 0.1791, "step": 10255 }, { "epoch": 35.36551724137931, "grad_norm": 0.7901200652122498, "learning_rate": 2.8620229885057473e-05, "loss": 0.1902, "step": 10256 }, { "epoch": 35.36896551724138, "grad_norm": 0.6942179799079895, "learning_rate": 2.8619770114942528e-05, "loss": 0.1941, "step": 10257 }, { "epoch": 35.37241379310345, "grad_norm": 0.6752281188964844, "learning_rate": 2.8619310344827587e-05, "loss": 0.1805, "step": 10258 }, { "epoch": 35.37586206896552, "grad_norm": 1.0135208368301392, "learning_rate": 2.8618850574712642e-05, "loss": 0.1981, "step": 10259 }, { "epoch": 35.37931034482759, "grad_norm": 0.4873535931110382, "learning_rate": 2.8618390804597704e-05, "loss": 0.1649, "step": 10260 }, { "epoch": 35.38275862068966, "grad_norm": 0.800179660320282, "learning_rate": 2.861793103448276e-05, "loss": 0.1623, "step": 10261 }, { "epoch": 35.38620689655173, "grad_norm": 0.9838978052139282, "learning_rate": 2.8617471264367815e-05, "loss": 0.1866, "step": 10262 }, { "epoch": 35.389655172413796, "grad_norm": 0.7816822528839111, "learning_rate": 2.8617011494252873e-05, "loss": 0.1817, "step": 10263 }, { "epoch": 35.39310344827586, "grad_norm": 0.5563119649887085, "learning_rate": 2.8616551724137932e-05, "loss": 0.174, "step": 10264 }, { "epoch": 35.39655172413793, "grad_norm": 0.7493913769721985, "learning_rate": 2.861609195402299e-05, "loss": 0.1853, "step": 10265 }, { "epoch": 35.4, "grad_norm": 0.9673469662666321, "learning_rate": 2.8615632183908046e-05, "loss": 0.1657, "step": 10266 }, { "epoch": 35.40344827586207, "grad_norm": 0.5708771347999573, "learning_rate": 2.86151724137931e-05, "loss": 0.1713, "step": 10267 }, { "epoch": 35.40689655172414, "grad_norm": 1.021803617477417, "learning_rate": 2.8614712643678163e-05, "loss": 0.1492, "step": 10268 }, { "epoch": 35.41034482758621, "grad_norm": 1.5312862396240234, "learning_rate": 2.861425287356322e-05, "loss": 0.1453, "step": 10269 }, { "epoch": 35.41379310344828, "grad_norm": 0.9435055255889893, "learning_rate": 2.8613793103448277e-05, "loss": 0.1651, "step": 10270 }, { "epoch": 35.41724137931035, "grad_norm": 1.6760451793670654, "learning_rate": 2.8613333333333333e-05, "loss": 0.1634, "step": 10271 }, { "epoch": 35.42068965517242, "grad_norm": 1.0028076171875, "learning_rate": 2.861287356321839e-05, "loss": 0.1461, "step": 10272 }, { "epoch": 35.42413793103448, "grad_norm": 0.9456700682640076, "learning_rate": 2.861241379310345e-05, "loss": 0.1456, "step": 10273 }, { "epoch": 35.42758620689655, "grad_norm": 1.437163233757019, "learning_rate": 2.8611954022988505e-05, "loss": 0.1497, "step": 10274 }, { "epoch": 35.43103448275862, "grad_norm": 0.9539439082145691, "learning_rate": 2.8611494252873564e-05, "loss": 0.1892, "step": 10275 }, { "epoch": 35.43448275862069, "grad_norm": 1.0725033283233643, "learning_rate": 2.8611034482758623e-05, "loss": 0.2701, "step": 10276 }, { "epoch": 35.43793103448276, "grad_norm": 0.7199802398681641, "learning_rate": 2.8610574712643678e-05, "loss": 0.2082, "step": 10277 }, { "epoch": 35.44137931034483, "grad_norm": 0.8559750914573669, "learning_rate": 2.8610114942528737e-05, "loss": 0.2069, "step": 10278 }, { "epoch": 35.4448275862069, "grad_norm": 0.5851635336875916, "learning_rate": 2.8609655172413792e-05, "loss": 0.1923, "step": 10279 }, { "epoch": 35.44827586206897, "grad_norm": 0.7483780980110168, "learning_rate": 2.860919540229885e-05, "loss": 0.214, "step": 10280 }, { "epoch": 35.45172413793104, "grad_norm": 0.9202209115028381, "learning_rate": 2.860873563218391e-05, "loss": 0.2229, "step": 10281 }, { "epoch": 35.4551724137931, "grad_norm": 1.2151525020599365, "learning_rate": 2.8608275862068965e-05, "loss": 0.201, "step": 10282 }, { "epoch": 35.45862068965517, "grad_norm": 0.7238057851791382, "learning_rate": 2.8607816091954023e-05, "loss": 0.2074, "step": 10283 }, { "epoch": 35.46206896551724, "grad_norm": 0.6211451888084412, "learning_rate": 2.8607356321839082e-05, "loss": 0.178, "step": 10284 }, { "epoch": 35.46551724137931, "grad_norm": 1.0408005714416504, "learning_rate": 2.8606896551724137e-05, "loss": 0.1817, "step": 10285 }, { "epoch": 35.46896551724138, "grad_norm": 1.8259632587432861, "learning_rate": 2.8606436781609196e-05, "loss": 0.1804, "step": 10286 }, { "epoch": 35.47241379310345, "grad_norm": 0.5324728488922119, "learning_rate": 2.860597701149425e-05, "loss": 0.178, "step": 10287 }, { "epoch": 35.47586206896552, "grad_norm": 0.8314831852912903, "learning_rate": 2.8605517241379313e-05, "loss": 0.1611, "step": 10288 }, { "epoch": 35.47931034482759, "grad_norm": 0.5396053791046143, "learning_rate": 2.860505747126437e-05, "loss": 0.1868, "step": 10289 }, { "epoch": 35.48275862068966, "grad_norm": 0.9740960597991943, "learning_rate": 2.8604597701149424e-05, "loss": 0.2051, "step": 10290 }, { "epoch": 35.48620689655172, "grad_norm": 0.6642908453941345, "learning_rate": 2.8604137931034483e-05, "loss": 0.1611, "step": 10291 }, { "epoch": 35.48965517241379, "grad_norm": 0.7185412049293518, "learning_rate": 2.860367816091954e-05, "loss": 0.1749, "step": 10292 }, { "epoch": 35.49310344827586, "grad_norm": 0.5960447192192078, "learning_rate": 2.86032183908046e-05, "loss": 0.1418, "step": 10293 }, { "epoch": 35.49655172413793, "grad_norm": 0.9124602675437927, "learning_rate": 2.8602758620689655e-05, "loss": 0.1423, "step": 10294 }, { "epoch": 35.5, "grad_norm": 0.9295604228973389, "learning_rate": 2.860229885057471e-05, "loss": 0.1531, "step": 10295 }, { "epoch": 35.50344827586207, "grad_norm": 0.7784161567687988, "learning_rate": 2.8601839080459773e-05, "loss": 0.1621, "step": 10296 }, { "epoch": 35.50689655172414, "grad_norm": 1.0525046586990356, "learning_rate": 2.8601379310344828e-05, "loss": 0.1317, "step": 10297 }, { "epoch": 35.51034482758621, "grad_norm": 0.8500874042510986, "learning_rate": 2.8600919540229887e-05, "loss": 0.1345, "step": 10298 }, { "epoch": 35.51379310344828, "grad_norm": 1.0794681310653687, "learning_rate": 2.8600459770114942e-05, "loss": 0.161, "step": 10299 }, { "epoch": 35.51724137931034, "grad_norm": 1.21353280544281, "learning_rate": 2.86e-05, "loss": 0.2237, "step": 10300 }, { "epoch": 35.52068965517241, "grad_norm": 0.8221877217292786, "learning_rate": 2.859954022988506e-05, "loss": 0.2638, "step": 10301 }, { "epoch": 35.52413793103448, "grad_norm": 0.7083932757377625, "learning_rate": 2.8599080459770115e-05, "loss": 0.2131, "step": 10302 }, { "epoch": 35.52758620689655, "grad_norm": 0.529538094997406, "learning_rate": 2.8598620689655173e-05, "loss": 0.2262, "step": 10303 }, { "epoch": 35.53103448275862, "grad_norm": 0.8539184927940369, "learning_rate": 2.8598160919540232e-05, "loss": 0.2184, "step": 10304 }, { "epoch": 35.53448275862069, "grad_norm": 0.6055418848991394, "learning_rate": 2.8597701149425287e-05, "loss": 0.1956, "step": 10305 }, { "epoch": 35.53793103448276, "grad_norm": 0.9483448266983032, "learning_rate": 2.8597241379310346e-05, "loss": 0.1707, "step": 10306 }, { "epoch": 35.54137931034483, "grad_norm": 0.7510677576065063, "learning_rate": 2.85967816091954e-05, "loss": 0.19, "step": 10307 }, { "epoch": 35.5448275862069, "grad_norm": 0.5474963784217834, "learning_rate": 2.859632183908046e-05, "loss": 0.2064, "step": 10308 }, { "epoch": 35.54827586206896, "grad_norm": 0.7499678730964661, "learning_rate": 2.859586206896552e-05, "loss": 0.1888, "step": 10309 }, { "epoch": 35.55172413793103, "grad_norm": 0.4910551905632019, "learning_rate": 2.8595402298850574e-05, "loss": 0.1634, "step": 10310 }, { "epoch": 35.5551724137931, "grad_norm": 0.9320417642593384, "learning_rate": 2.8594942528735633e-05, "loss": 0.166, "step": 10311 }, { "epoch": 35.55862068965517, "grad_norm": 0.916386067867279, "learning_rate": 2.859448275862069e-05, "loss": 0.2169, "step": 10312 }, { "epoch": 35.56206896551724, "grad_norm": 0.6005914807319641, "learning_rate": 2.8594022988505747e-05, "loss": 0.1621, "step": 10313 }, { "epoch": 35.56551724137931, "grad_norm": 1.186562418937683, "learning_rate": 2.8593563218390805e-05, "loss": 0.1713, "step": 10314 }, { "epoch": 35.56896551724138, "grad_norm": 0.703463077545166, "learning_rate": 2.859310344827586e-05, "loss": 0.1697, "step": 10315 }, { "epoch": 35.57241379310345, "grad_norm": 1.68710458278656, "learning_rate": 2.8592643678160923e-05, "loss": 0.1673, "step": 10316 }, { "epoch": 35.57586206896552, "grad_norm": 1.0603516101837158, "learning_rate": 2.8592183908045978e-05, "loss": 0.157, "step": 10317 }, { "epoch": 35.57931034482758, "grad_norm": 0.6300113201141357, "learning_rate": 2.8591724137931033e-05, "loss": 0.1552, "step": 10318 }, { "epoch": 35.58275862068965, "grad_norm": 0.6221239566802979, "learning_rate": 2.8591264367816092e-05, "loss": 0.1537, "step": 10319 }, { "epoch": 35.58620689655172, "grad_norm": 0.7843586802482605, "learning_rate": 2.859080459770115e-05, "loss": 0.1576, "step": 10320 }, { "epoch": 35.58965517241379, "grad_norm": 0.9407371282577515, "learning_rate": 2.859034482758621e-05, "loss": 0.1848, "step": 10321 }, { "epoch": 35.59310344827586, "grad_norm": 1.2893898487091064, "learning_rate": 2.8589885057471265e-05, "loss": 0.13, "step": 10322 }, { "epoch": 35.59655172413793, "grad_norm": 0.7452577352523804, "learning_rate": 2.858942528735632e-05, "loss": 0.1649, "step": 10323 }, { "epoch": 35.6, "grad_norm": 1.2516206502914429, "learning_rate": 2.8588965517241382e-05, "loss": 0.1519, "step": 10324 }, { "epoch": 35.60344827586207, "grad_norm": 1.413318157196045, "learning_rate": 2.8588505747126437e-05, "loss": 0.1829, "step": 10325 }, { "epoch": 35.60689655172414, "grad_norm": 0.518332302570343, "learning_rate": 2.8588045977011496e-05, "loss": 0.2383, "step": 10326 }, { "epoch": 35.610344827586204, "grad_norm": 0.5175591111183167, "learning_rate": 2.858758620689655e-05, "loss": 0.2274, "step": 10327 }, { "epoch": 35.61379310344827, "grad_norm": 0.5284023284912109, "learning_rate": 2.858712643678161e-05, "loss": 0.2225, "step": 10328 }, { "epoch": 35.61724137931034, "grad_norm": 0.8050899505615234, "learning_rate": 2.858666666666667e-05, "loss": 0.1966, "step": 10329 }, { "epoch": 35.62068965517241, "grad_norm": 0.5646608471870422, "learning_rate": 2.8586206896551724e-05, "loss": 0.1964, "step": 10330 }, { "epoch": 35.62413793103448, "grad_norm": 0.7097304463386536, "learning_rate": 2.8585747126436783e-05, "loss": 0.1937, "step": 10331 }, { "epoch": 35.62758620689655, "grad_norm": 0.8312870264053345, "learning_rate": 2.858528735632184e-05, "loss": 0.1709, "step": 10332 }, { "epoch": 35.63103448275862, "grad_norm": 1.9555140733718872, "learning_rate": 2.8584827586206896e-05, "loss": 0.1994, "step": 10333 }, { "epoch": 35.63448275862069, "grad_norm": 0.5473332405090332, "learning_rate": 2.8584367816091955e-05, "loss": 0.1649, "step": 10334 }, { "epoch": 35.63793103448276, "grad_norm": 0.8518280386924744, "learning_rate": 2.858390804597701e-05, "loss": 0.1677, "step": 10335 }, { "epoch": 35.641379310344824, "grad_norm": 0.572515070438385, "learning_rate": 2.858344827586207e-05, "loss": 0.1874, "step": 10336 }, { "epoch": 35.644827586206894, "grad_norm": 1.9041945934295654, "learning_rate": 2.8582988505747128e-05, "loss": 0.1559, "step": 10337 }, { "epoch": 35.648275862068964, "grad_norm": 0.6052646636962891, "learning_rate": 2.8582528735632183e-05, "loss": 0.1725, "step": 10338 }, { "epoch": 35.65172413793103, "grad_norm": 0.7605915665626526, "learning_rate": 2.8582068965517242e-05, "loss": 0.1822, "step": 10339 }, { "epoch": 35.6551724137931, "grad_norm": 0.7756117582321167, "learning_rate": 2.85816091954023e-05, "loss": 0.1524, "step": 10340 }, { "epoch": 35.65862068965517, "grad_norm": 0.7958968877792358, "learning_rate": 2.8581149425287356e-05, "loss": 0.1722, "step": 10341 }, { "epoch": 35.66206896551724, "grad_norm": 0.6221120357513428, "learning_rate": 2.8580689655172414e-05, "loss": 0.1583, "step": 10342 }, { "epoch": 35.66551724137931, "grad_norm": 0.6699879169464111, "learning_rate": 2.858022988505747e-05, "loss": 0.1422, "step": 10343 }, { "epoch": 35.66896551724138, "grad_norm": 0.9143997430801392, "learning_rate": 2.8579770114942532e-05, "loss": 0.1726, "step": 10344 }, { "epoch": 35.672413793103445, "grad_norm": 0.6610894799232483, "learning_rate": 2.8579310344827587e-05, "loss": 0.1353, "step": 10345 }, { "epoch": 35.675862068965515, "grad_norm": 0.829464852809906, "learning_rate": 2.8578850574712642e-05, "loss": 0.1762, "step": 10346 }, { "epoch": 35.679310344827584, "grad_norm": 0.8074849843978882, "learning_rate": 2.85783908045977e-05, "loss": 0.1497, "step": 10347 }, { "epoch": 35.682758620689654, "grad_norm": 0.8276913166046143, "learning_rate": 2.857793103448276e-05, "loss": 0.1284, "step": 10348 }, { "epoch": 35.686206896551724, "grad_norm": 0.9498240947723389, "learning_rate": 2.857747126436782e-05, "loss": 0.1604, "step": 10349 }, { "epoch": 35.689655172413794, "grad_norm": 1.6855474710464478, "learning_rate": 2.8577011494252874e-05, "loss": 0.2394, "step": 10350 }, { "epoch": 35.69310344827586, "grad_norm": 1.4327832460403442, "learning_rate": 2.857655172413793e-05, "loss": 0.2339, "step": 10351 }, { "epoch": 35.69655172413793, "grad_norm": 0.6611794233322144, "learning_rate": 2.857609195402299e-05, "loss": 0.2058, "step": 10352 }, { "epoch": 35.7, "grad_norm": 0.49212613701820374, "learning_rate": 2.8575632183908046e-05, "loss": 0.2059, "step": 10353 }, { "epoch": 35.703448275862065, "grad_norm": 0.5268010497093201, "learning_rate": 2.8575172413793105e-05, "loss": 0.2124, "step": 10354 }, { "epoch": 35.706896551724135, "grad_norm": 0.7500028014183044, "learning_rate": 2.857471264367816e-05, "loss": 0.1878, "step": 10355 }, { "epoch": 35.710344827586205, "grad_norm": 0.8072144389152527, "learning_rate": 2.857425287356322e-05, "loss": 0.2081, "step": 10356 }, { "epoch": 35.713793103448275, "grad_norm": 0.8634318113327026, "learning_rate": 2.8573793103448278e-05, "loss": 0.2026, "step": 10357 }, { "epoch": 35.717241379310344, "grad_norm": 0.5068686008453369, "learning_rate": 2.8573333333333333e-05, "loss": 0.1816, "step": 10358 }, { "epoch": 35.720689655172414, "grad_norm": 1.073385238647461, "learning_rate": 2.8572873563218392e-05, "loss": 0.1735, "step": 10359 }, { "epoch": 35.724137931034484, "grad_norm": 0.4670589864253998, "learning_rate": 2.857241379310345e-05, "loss": 0.1787, "step": 10360 }, { "epoch": 35.727586206896554, "grad_norm": 1.0959296226501465, "learning_rate": 2.8571954022988506e-05, "loss": 0.1776, "step": 10361 }, { "epoch": 35.73103448275862, "grad_norm": 0.5742167830467224, "learning_rate": 2.8571494252873564e-05, "loss": 0.1777, "step": 10362 }, { "epoch": 35.734482758620686, "grad_norm": 0.7791165709495544, "learning_rate": 2.857103448275862e-05, "loss": 0.192, "step": 10363 }, { "epoch": 35.737931034482756, "grad_norm": 2.1882145404815674, "learning_rate": 2.857057471264368e-05, "loss": 0.1707, "step": 10364 }, { "epoch": 35.741379310344826, "grad_norm": 0.7021902203559875, "learning_rate": 2.8570114942528737e-05, "loss": 0.201, "step": 10365 }, { "epoch": 35.744827586206895, "grad_norm": 0.6052233576774597, "learning_rate": 2.8569655172413792e-05, "loss": 0.1573, "step": 10366 }, { "epoch": 35.748275862068965, "grad_norm": 0.7013341784477234, "learning_rate": 2.856919540229885e-05, "loss": 0.1628, "step": 10367 }, { "epoch": 35.751724137931035, "grad_norm": 0.5399353504180908, "learning_rate": 2.856873563218391e-05, "loss": 0.144, "step": 10368 }, { "epoch": 35.755172413793105, "grad_norm": 1.0603095293045044, "learning_rate": 2.8568275862068965e-05, "loss": 0.1601, "step": 10369 }, { "epoch": 35.758620689655174, "grad_norm": 1.2693699598312378, "learning_rate": 2.8567816091954024e-05, "loss": 0.1764, "step": 10370 }, { "epoch": 35.762068965517244, "grad_norm": 0.6678012609481812, "learning_rate": 2.856735632183908e-05, "loss": 0.1346, "step": 10371 }, { "epoch": 35.765517241379314, "grad_norm": 0.8113283514976501, "learning_rate": 2.856689655172414e-05, "loss": 0.1423, "step": 10372 }, { "epoch": 35.76896551724138, "grad_norm": 0.755314290523529, "learning_rate": 2.8566436781609196e-05, "loss": 0.1425, "step": 10373 }, { "epoch": 35.772413793103446, "grad_norm": 1.471761703491211, "learning_rate": 2.856597701149425e-05, "loss": 0.1768, "step": 10374 }, { "epoch": 35.775862068965516, "grad_norm": 1.8103656768798828, "learning_rate": 2.856551724137931e-05, "loss": 0.2324, "step": 10375 }, { "epoch": 35.779310344827586, "grad_norm": 1.970650553703308, "learning_rate": 2.856505747126437e-05, "loss": 0.226, "step": 10376 }, { "epoch": 35.782758620689656, "grad_norm": 1.0091722011566162, "learning_rate": 2.8564597701149428e-05, "loss": 0.2328, "step": 10377 }, { "epoch": 35.786206896551725, "grad_norm": 0.5555372834205627, "learning_rate": 2.8564137931034483e-05, "loss": 0.2149, "step": 10378 }, { "epoch": 35.789655172413795, "grad_norm": 0.4518444240093231, "learning_rate": 2.856367816091954e-05, "loss": 0.1808, "step": 10379 }, { "epoch": 35.793103448275865, "grad_norm": 0.6552831530570984, "learning_rate": 2.85632183908046e-05, "loss": 0.1782, "step": 10380 }, { "epoch": 35.796551724137935, "grad_norm": 0.6576346755027771, "learning_rate": 2.8562758620689656e-05, "loss": 0.1955, "step": 10381 }, { "epoch": 35.8, "grad_norm": 0.6859720349311829, "learning_rate": 2.8562298850574714e-05, "loss": 0.1839, "step": 10382 }, { "epoch": 35.80344827586207, "grad_norm": 0.6651900410652161, "learning_rate": 2.856183908045977e-05, "loss": 0.2031, "step": 10383 }, { "epoch": 35.80689655172414, "grad_norm": 1.2819304466247559, "learning_rate": 2.856137931034483e-05, "loss": 0.1615, "step": 10384 }, { "epoch": 35.810344827586206, "grad_norm": 0.9333711266517639, "learning_rate": 2.8560919540229887e-05, "loss": 0.1929, "step": 10385 }, { "epoch": 35.813793103448276, "grad_norm": 0.45594725012779236, "learning_rate": 2.8560459770114942e-05, "loss": 0.1694, "step": 10386 }, { "epoch": 35.817241379310346, "grad_norm": 0.7285921573638916, "learning_rate": 2.856e-05, "loss": 0.1739, "step": 10387 }, { "epoch": 35.820689655172416, "grad_norm": 0.5936203598976135, "learning_rate": 2.855954022988506e-05, "loss": 0.1479, "step": 10388 }, { "epoch": 35.824137931034485, "grad_norm": 1.419968843460083, "learning_rate": 2.8559080459770115e-05, "loss": 0.1667, "step": 10389 }, { "epoch": 35.827586206896555, "grad_norm": 0.7547966241836548, "learning_rate": 2.8558620689655174e-05, "loss": 0.1777, "step": 10390 }, { "epoch": 35.83103448275862, "grad_norm": 0.8019425868988037, "learning_rate": 2.855816091954023e-05, "loss": 0.1688, "step": 10391 }, { "epoch": 35.83448275862069, "grad_norm": 0.8174415826797485, "learning_rate": 2.8557701149425288e-05, "loss": 0.1751, "step": 10392 }, { "epoch": 35.83793103448276, "grad_norm": 0.717470645904541, "learning_rate": 2.8557241379310346e-05, "loss": 0.1747, "step": 10393 }, { "epoch": 35.84137931034483, "grad_norm": 0.9046068787574768, "learning_rate": 2.85567816091954e-05, "loss": 0.1792, "step": 10394 }, { "epoch": 35.8448275862069, "grad_norm": 1.0335919857025146, "learning_rate": 2.855632183908046e-05, "loss": 0.1581, "step": 10395 }, { "epoch": 35.84827586206897, "grad_norm": 1.7737821340560913, "learning_rate": 2.855586206896552e-05, "loss": 0.1559, "step": 10396 }, { "epoch": 35.851724137931036, "grad_norm": 0.8539853692054749, "learning_rate": 2.8555402298850574e-05, "loss": 0.164, "step": 10397 }, { "epoch": 35.855172413793106, "grad_norm": 1.1912070512771606, "learning_rate": 2.8554942528735633e-05, "loss": 0.1868, "step": 10398 }, { "epoch": 35.858620689655176, "grad_norm": 1.0280811786651611, "learning_rate": 2.8554482758620688e-05, "loss": 0.1959, "step": 10399 }, { "epoch": 35.86206896551724, "grad_norm": 1.689182996749878, "learning_rate": 2.855402298850575e-05, "loss": 0.2185, "step": 10400 }, { "epoch": 35.86551724137931, "grad_norm": 0.812036395072937, "learning_rate": 2.8553563218390806e-05, "loss": 0.235, "step": 10401 }, { "epoch": 35.86896551724138, "grad_norm": 0.6789072155952454, "learning_rate": 2.855310344827586e-05, "loss": 0.2002, "step": 10402 }, { "epoch": 35.87241379310345, "grad_norm": 0.6655060052871704, "learning_rate": 2.855264367816092e-05, "loss": 0.2157, "step": 10403 }, { "epoch": 35.87586206896552, "grad_norm": 0.45126986503601074, "learning_rate": 2.8552183908045978e-05, "loss": 0.1896, "step": 10404 }, { "epoch": 35.87931034482759, "grad_norm": 0.7045274376869202, "learning_rate": 2.8551724137931037e-05, "loss": 0.2036, "step": 10405 }, { "epoch": 35.88275862068966, "grad_norm": 1.079421043395996, "learning_rate": 2.8551264367816092e-05, "loss": 0.1923, "step": 10406 }, { "epoch": 35.88620689655173, "grad_norm": 1.6206239461898804, "learning_rate": 2.8550804597701148e-05, "loss": 0.1752, "step": 10407 }, { "epoch": 35.889655172413796, "grad_norm": 0.5325156450271606, "learning_rate": 2.855034482758621e-05, "loss": 0.1741, "step": 10408 }, { "epoch": 35.89310344827586, "grad_norm": 0.6551870107650757, "learning_rate": 2.8549885057471265e-05, "loss": 0.1835, "step": 10409 }, { "epoch": 35.89655172413793, "grad_norm": 0.9337643384933472, "learning_rate": 2.8549425287356324e-05, "loss": 0.1795, "step": 10410 }, { "epoch": 35.9, "grad_norm": 0.47067779302597046, "learning_rate": 2.854896551724138e-05, "loss": 0.1567, "step": 10411 }, { "epoch": 35.90344827586207, "grad_norm": 1.178234577178955, "learning_rate": 2.8548505747126438e-05, "loss": 0.17, "step": 10412 }, { "epoch": 35.90689655172414, "grad_norm": 0.7222755551338196, "learning_rate": 2.8548045977011496e-05, "loss": 0.1677, "step": 10413 }, { "epoch": 35.91034482758621, "grad_norm": 0.8576640486717224, "learning_rate": 2.854758620689655e-05, "loss": 0.1617, "step": 10414 }, { "epoch": 35.91379310344828, "grad_norm": 0.5796521902084351, "learning_rate": 2.854712643678161e-05, "loss": 0.1671, "step": 10415 }, { "epoch": 35.91724137931035, "grad_norm": 0.6408844590187073, "learning_rate": 2.854666666666667e-05, "loss": 0.1541, "step": 10416 }, { "epoch": 35.92068965517242, "grad_norm": 0.8083669543266296, "learning_rate": 2.8546206896551724e-05, "loss": 0.1827, "step": 10417 }, { "epoch": 35.92413793103448, "grad_norm": 1.069996953010559, "learning_rate": 2.8545747126436783e-05, "loss": 0.1576, "step": 10418 }, { "epoch": 35.92758620689655, "grad_norm": 1.2606208324432373, "learning_rate": 2.8545287356321838e-05, "loss": 0.153, "step": 10419 }, { "epoch": 35.93103448275862, "grad_norm": 0.6602324843406677, "learning_rate": 2.85448275862069e-05, "loss": 0.1551, "step": 10420 }, { "epoch": 35.93448275862069, "grad_norm": 0.944200873374939, "learning_rate": 2.8544367816091956e-05, "loss": 0.1512, "step": 10421 }, { "epoch": 35.93793103448276, "grad_norm": 1.1788675785064697, "learning_rate": 2.854390804597701e-05, "loss": 0.1557, "step": 10422 }, { "epoch": 35.94137931034483, "grad_norm": 0.8357532620429993, "learning_rate": 2.854344827586207e-05, "loss": 0.1478, "step": 10423 }, { "epoch": 35.9448275862069, "grad_norm": 1.2528945207595825, "learning_rate": 2.8542988505747128e-05, "loss": 0.197, "step": 10424 }, { "epoch": 35.94827586206897, "grad_norm": 1.26723313331604, "learning_rate": 2.8542528735632184e-05, "loss": 0.2122, "step": 10425 }, { "epoch": 35.95172413793104, "grad_norm": 0.6390089988708496, "learning_rate": 2.8542068965517242e-05, "loss": 0.246, "step": 10426 }, { "epoch": 35.9551724137931, "grad_norm": 1.139783263206482, "learning_rate": 2.8541609195402297e-05, "loss": 0.2047, "step": 10427 }, { "epoch": 35.95862068965517, "grad_norm": 1.32783842086792, "learning_rate": 2.854114942528736e-05, "loss": 0.2018, "step": 10428 }, { "epoch": 35.96206896551724, "grad_norm": 0.6505367159843445, "learning_rate": 2.8540689655172415e-05, "loss": 0.1805, "step": 10429 }, { "epoch": 35.96551724137931, "grad_norm": 0.8855769038200378, "learning_rate": 2.854022988505747e-05, "loss": 0.1813, "step": 10430 }, { "epoch": 35.96896551724138, "grad_norm": 0.6777186393737793, "learning_rate": 2.853977011494253e-05, "loss": 0.1876, "step": 10431 }, { "epoch": 35.97241379310345, "grad_norm": 0.6614531874656677, "learning_rate": 2.8539310344827588e-05, "loss": 0.1988, "step": 10432 }, { "epoch": 35.97586206896552, "grad_norm": 2.1124305725097656, "learning_rate": 2.8538850574712646e-05, "loss": 0.1671, "step": 10433 }, { "epoch": 35.97931034482759, "grad_norm": 1.0894685983657837, "learning_rate": 2.85383908045977e-05, "loss": 0.1748, "step": 10434 }, { "epoch": 35.98275862068966, "grad_norm": 0.5889418125152588, "learning_rate": 2.8537931034482757e-05, "loss": 0.1633, "step": 10435 }, { "epoch": 35.98620689655172, "grad_norm": 0.8485708832740784, "learning_rate": 2.853747126436782e-05, "loss": 0.1754, "step": 10436 }, { "epoch": 35.98965517241379, "grad_norm": 1.1191381216049194, "learning_rate": 2.8537011494252874e-05, "loss": 0.174, "step": 10437 }, { "epoch": 35.99310344827586, "grad_norm": 0.6538047194480896, "learning_rate": 2.8536551724137933e-05, "loss": 0.1318, "step": 10438 }, { "epoch": 35.99655172413793, "grad_norm": 0.898048460483551, "learning_rate": 2.8536091954022988e-05, "loss": 0.162, "step": 10439 }, { "epoch": 36.0, "grad_norm": 0.9888994097709656, "learning_rate": 2.8535632183908047e-05, "loss": 0.2422, "step": 10440 }, { "epoch": 36.00344827586207, "grad_norm": 0.5813559889793396, "learning_rate": 2.8535172413793105e-05, "loss": 0.2422, "step": 10441 }, { "epoch": 36.00689655172414, "grad_norm": 0.7736622095108032, "learning_rate": 2.853471264367816e-05, "loss": 0.1817, "step": 10442 }, { "epoch": 36.01034482758621, "grad_norm": 1.0966196060180664, "learning_rate": 2.853425287356322e-05, "loss": 0.2112, "step": 10443 }, { "epoch": 36.01379310344828, "grad_norm": 0.5968170762062073, "learning_rate": 2.8533793103448278e-05, "loss": 0.2032, "step": 10444 }, { "epoch": 36.01724137931034, "grad_norm": 0.7367808222770691, "learning_rate": 2.8533333333333333e-05, "loss": 0.1874, "step": 10445 }, { "epoch": 36.02068965517241, "grad_norm": 0.9025722146034241, "learning_rate": 2.8532873563218392e-05, "loss": 0.1929, "step": 10446 }, { "epoch": 36.02413793103448, "grad_norm": 1.6308329105377197, "learning_rate": 2.8532413793103447e-05, "loss": 0.1934, "step": 10447 }, { "epoch": 36.02758620689655, "grad_norm": 0.7294802665710449, "learning_rate": 2.853195402298851e-05, "loss": 0.1796, "step": 10448 }, { "epoch": 36.03103448275862, "grad_norm": 0.9212831854820251, "learning_rate": 2.8531494252873565e-05, "loss": 0.1935, "step": 10449 }, { "epoch": 36.03448275862069, "grad_norm": 1.3328993320465088, "learning_rate": 2.853103448275862e-05, "loss": 0.1674, "step": 10450 }, { "epoch": 36.03793103448276, "grad_norm": 1.0410634279251099, "learning_rate": 2.853057471264368e-05, "loss": 0.1737, "step": 10451 }, { "epoch": 36.04137931034483, "grad_norm": 1.1005738973617554, "learning_rate": 2.8530114942528737e-05, "loss": 0.1794, "step": 10452 }, { "epoch": 36.0448275862069, "grad_norm": 0.9916546940803528, "learning_rate": 2.8529655172413793e-05, "loss": 0.1532, "step": 10453 }, { "epoch": 36.04827586206896, "grad_norm": 0.798786461353302, "learning_rate": 2.852919540229885e-05, "loss": 0.1503, "step": 10454 }, { "epoch": 36.05172413793103, "grad_norm": 0.547981321811676, "learning_rate": 2.8528735632183907e-05, "loss": 0.1752, "step": 10455 }, { "epoch": 36.0551724137931, "grad_norm": 0.67449951171875, "learning_rate": 2.852827586206897e-05, "loss": 0.1457, "step": 10456 }, { "epoch": 36.05862068965517, "grad_norm": 1.808521032333374, "learning_rate": 2.8527816091954024e-05, "loss": 0.1522, "step": 10457 }, { "epoch": 36.06206896551724, "grad_norm": 1.0026878118515015, "learning_rate": 2.852735632183908e-05, "loss": 0.1456, "step": 10458 }, { "epoch": 36.06551724137931, "grad_norm": 0.760675847530365, "learning_rate": 2.8526896551724138e-05, "loss": 0.1597, "step": 10459 }, { "epoch": 36.06896551724138, "grad_norm": 0.747279167175293, "learning_rate": 2.8526436781609197e-05, "loss": 0.1635, "step": 10460 }, { "epoch": 36.07241379310345, "grad_norm": 0.8536329865455627, "learning_rate": 2.8525977011494255e-05, "loss": 0.1439, "step": 10461 }, { "epoch": 36.07586206896552, "grad_norm": 0.7876914143562317, "learning_rate": 2.852551724137931e-05, "loss": 0.1233, "step": 10462 }, { "epoch": 36.07931034482758, "grad_norm": 0.8344675898551941, "learning_rate": 2.8525057471264366e-05, "loss": 0.136, "step": 10463 }, { "epoch": 36.08275862068965, "grad_norm": 0.7839145064353943, "learning_rate": 2.8524597701149428e-05, "loss": 0.1426, "step": 10464 }, { "epoch": 36.08620689655172, "grad_norm": 1.1594339609146118, "learning_rate": 2.8524137931034483e-05, "loss": 0.2, "step": 10465 }, { "epoch": 36.08965517241379, "grad_norm": 0.6043087840080261, "learning_rate": 2.8523678160919542e-05, "loss": 0.2205, "step": 10466 }, { "epoch": 36.09310344827586, "grad_norm": 0.4692460298538208, "learning_rate": 2.8523218390804597e-05, "loss": 0.193, "step": 10467 }, { "epoch": 36.09655172413793, "grad_norm": 0.5886786580085754, "learning_rate": 2.8522758620689656e-05, "loss": 0.1982, "step": 10468 }, { "epoch": 36.1, "grad_norm": 1.181617021560669, "learning_rate": 2.8522298850574715e-05, "loss": 0.2089, "step": 10469 }, { "epoch": 36.10344827586207, "grad_norm": 1.0383490324020386, "learning_rate": 2.852183908045977e-05, "loss": 0.1833, "step": 10470 }, { "epoch": 36.10689655172414, "grad_norm": 0.5755918025970459, "learning_rate": 2.852137931034483e-05, "loss": 0.1783, "step": 10471 }, { "epoch": 36.110344827586204, "grad_norm": 0.5168686509132385, "learning_rate": 2.8520919540229887e-05, "loss": 0.1833, "step": 10472 }, { "epoch": 36.11379310344827, "grad_norm": 0.5767465829849243, "learning_rate": 2.8520459770114943e-05, "loss": 0.1736, "step": 10473 }, { "epoch": 36.11724137931034, "grad_norm": 0.738239586353302, "learning_rate": 2.852e-05, "loss": 0.1647, "step": 10474 }, { "epoch": 36.12068965517241, "grad_norm": 1.0700907707214355, "learning_rate": 2.8519540229885057e-05, "loss": 0.1683, "step": 10475 }, { "epoch": 36.12413793103448, "grad_norm": 0.67559814453125, "learning_rate": 2.851908045977012e-05, "loss": 0.1748, "step": 10476 }, { "epoch": 36.12758620689655, "grad_norm": 0.6716015934944153, "learning_rate": 2.8518620689655174e-05, "loss": 0.1597, "step": 10477 }, { "epoch": 36.13103448275862, "grad_norm": 0.964948296546936, "learning_rate": 2.851816091954023e-05, "loss": 0.1641, "step": 10478 }, { "epoch": 36.13448275862069, "grad_norm": 7.844889163970947, "learning_rate": 2.8517701149425288e-05, "loss": 0.1492, "step": 10479 }, { "epoch": 36.13793103448276, "grad_norm": 0.7213267683982849, "learning_rate": 2.8517241379310347e-05, "loss": 0.1675, "step": 10480 }, { "epoch": 36.141379310344824, "grad_norm": 1.0159494876861572, "learning_rate": 2.8516781609195402e-05, "loss": 0.1408, "step": 10481 }, { "epoch": 36.144827586206894, "grad_norm": 0.9904018640518188, "learning_rate": 2.851632183908046e-05, "loss": 0.1473, "step": 10482 }, { "epoch": 36.148275862068964, "grad_norm": 1.0785990953445435, "learning_rate": 2.8515862068965516e-05, "loss": 0.1653, "step": 10483 }, { "epoch": 36.15172413793103, "grad_norm": 0.8579023480415344, "learning_rate": 2.8515402298850578e-05, "loss": 0.1437, "step": 10484 }, { "epoch": 36.1551724137931, "grad_norm": 0.9088259339332581, "learning_rate": 2.8514942528735633e-05, "loss": 0.1432, "step": 10485 }, { "epoch": 36.15862068965517, "grad_norm": 1.142698049545288, "learning_rate": 2.851448275862069e-05, "loss": 0.1573, "step": 10486 }, { "epoch": 36.16206896551724, "grad_norm": 0.6574388742446899, "learning_rate": 2.8514022988505747e-05, "loss": 0.1347, "step": 10487 }, { "epoch": 36.16551724137931, "grad_norm": 0.9330369234085083, "learning_rate": 2.8513563218390806e-05, "loss": 0.1477, "step": 10488 }, { "epoch": 36.16896551724138, "grad_norm": 1.5068362951278687, "learning_rate": 2.8513103448275865e-05, "loss": 0.164, "step": 10489 }, { "epoch": 36.172413793103445, "grad_norm": 1.5394538640975952, "learning_rate": 2.851264367816092e-05, "loss": 0.2232, "step": 10490 }, { "epoch": 36.175862068965515, "grad_norm": 0.8614137768745422, "learning_rate": 2.8512183908045975e-05, "loss": 0.235, "step": 10491 }, { "epoch": 36.179310344827584, "grad_norm": 0.9790729284286499, "learning_rate": 2.8511724137931037e-05, "loss": 0.2228, "step": 10492 }, { "epoch": 36.182758620689654, "grad_norm": 1.0231314897537231, "learning_rate": 2.8511264367816093e-05, "loss": 0.1897, "step": 10493 }, { "epoch": 36.186206896551724, "grad_norm": 0.592785656452179, "learning_rate": 2.851080459770115e-05, "loss": 0.1846, "step": 10494 }, { "epoch": 36.189655172413794, "grad_norm": 0.5411748886108398, "learning_rate": 2.8510344827586207e-05, "loss": 0.1672, "step": 10495 }, { "epoch": 36.19310344827586, "grad_norm": 0.8696944117546082, "learning_rate": 2.8509885057471265e-05, "loss": 0.176, "step": 10496 }, { "epoch": 36.19655172413793, "grad_norm": 0.6341794729232788, "learning_rate": 2.8509425287356324e-05, "loss": 0.1944, "step": 10497 }, { "epoch": 36.2, "grad_norm": 0.7712282538414001, "learning_rate": 2.850896551724138e-05, "loss": 0.1664, "step": 10498 }, { "epoch": 36.203448275862065, "grad_norm": 0.4797876179218292, "learning_rate": 2.8508505747126438e-05, "loss": 0.1803, "step": 10499 }, { "epoch": 36.206896551724135, "grad_norm": 0.6531980037689209, "learning_rate": 2.8508045977011497e-05, "loss": 0.1835, "step": 10500 }, { "epoch": 36.210344827586205, "grad_norm": 0.9956763982772827, "learning_rate": 2.8507586206896552e-05, "loss": 0.1841, "step": 10501 }, { "epoch": 36.213793103448275, "grad_norm": 0.5774454474449158, "learning_rate": 2.850712643678161e-05, "loss": 0.1712, "step": 10502 }, { "epoch": 36.217241379310344, "grad_norm": 0.7493171095848083, "learning_rate": 2.8506666666666666e-05, "loss": 0.1515, "step": 10503 }, { "epoch": 36.220689655172414, "grad_norm": 1.1045128107070923, "learning_rate": 2.8506206896551728e-05, "loss": 0.1484, "step": 10504 }, { "epoch": 36.224137931034484, "grad_norm": 0.5788100361824036, "learning_rate": 2.8505747126436783e-05, "loss": 0.1661, "step": 10505 }, { "epoch": 36.227586206896554, "grad_norm": 0.9339642524719238, "learning_rate": 2.850528735632184e-05, "loss": 0.1528, "step": 10506 }, { "epoch": 36.23103448275862, "grad_norm": 0.7405489683151245, "learning_rate": 2.8504827586206897e-05, "loss": 0.144, "step": 10507 }, { "epoch": 36.234482758620686, "grad_norm": 0.6943523287773132, "learning_rate": 2.8504367816091956e-05, "loss": 0.1359, "step": 10508 }, { "epoch": 36.237931034482756, "grad_norm": 0.8890888094902039, "learning_rate": 2.8503908045977015e-05, "loss": 0.1582, "step": 10509 }, { "epoch": 36.241379310344826, "grad_norm": 1.912851333618164, "learning_rate": 2.850344827586207e-05, "loss": 0.1369, "step": 10510 }, { "epoch": 36.244827586206895, "grad_norm": 1.0719809532165527, "learning_rate": 2.8502988505747125e-05, "loss": 0.1676, "step": 10511 }, { "epoch": 36.248275862068965, "grad_norm": 0.7688038349151611, "learning_rate": 2.8502528735632187e-05, "loss": 0.1452, "step": 10512 }, { "epoch": 36.251724137931035, "grad_norm": 0.9972187876701355, "learning_rate": 2.8502068965517243e-05, "loss": 0.1272, "step": 10513 }, { "epoch": 36.255172413793105, "grad_norm": 1.3029412031173706, "learning_rate": 2.8501609195402298e-05, "loss": 0.1284, "step": 10514 }, { "epoch": 36.258620689655174, "grad_norm": 1.0832159519195557, "learning_rate": 2.8501149425287357e-05, "loss": 0.1831, "step": 10515 }, { "epoch": 36.262068965517244, "grad_norm": 0.559241771697998, "learning_rate": 2.8500689655172415e-05, "loss": 0.2341, "step": 10516 }, { "epoch": 36.265517241379314, "grad_norm": 0.5437224507331848, "learning_rate": 2.8500229885057474e-05, "loss": 0.2211, "step": 10517 }, { "epoch": 36.26896551724138, "grad_norm": 0.43137261271476746, "learning_rate": 2.849977011494253e-05, "loss": 0.1926, "step": 10518 }, { "epoch": 36.272413793103446, "grad_norm": 0.7585649490356445, "learning_rate": 2.8499310344827585e-05, "loss": 0.1797, "step": 10519 }, { "epoch": 36.275862068965516, "grad_norm": 1.0862505435943604, "learning_rate": 2.8498850574712647e-05, "loss": 0.2107, "step": 10520 }, { "epoch": 36.279310344827586, "grad_norm": 1.1502206325531006, "learning_rate": 2.8498390804597702e-05, "loss": 0.1736, "step": 10521 }, { "epoch": 36.282758620689656, "grad_norm": 1.1045531034469604, "learning_rate": 2.849793103448276e-05, "loss": 0.184, "step": 10522 }, { "epoch": 36.286206896551725, "grad_norm": 0.7183961272239685, "learning_rate": 2.8497471264367816e-05, "loss": 0.181, "step": 10523 }, { "epoch": 36.289655172413795, "grad_norm": 0.684669017791748, "learning_rate": 2.8497011494252875e-05, "loss": 0.1665, "step": 10524 }, { "epoch": 36.293103448275865, "grad_norm": 0.5601993799209595, "learning_rate": 2.8496551724137933e-05, "loss": 0.1901, "step": 10525 }, { "epoch": 36.296551724137935, "grad_norm": 0.6543950438499451, "learning_rate": 2.849609195402299e-05, "loss": 0.1664, "step": 10526 }, { "epoch": 36.3, "grad_norm": 0.8072074055671692, "learning_rate": 2.8495632183908047e-05, "loss": 0.1776, "step": 10527 }, { "epoch": 36.30344827586207, "grad_norm": 0.910457193851471, "learning_rate": 2.8495172413793106e-05, "loss": 0.1591, "step": 10528 }, { "epoch": 36.30689655172414, "grad_norm": 1.2133674621582031, "learning_rate": 2.849471264367816e-05, "loss": 0.177, "step": 10529 }, { "epoch": 36.310344827586206, "grad_norm": 0.8134106397628784, "learning_rate": 2.849425287356322e-05, "loss": 0.153, "step": 10530 }, { "epoch": 36.313793103448276, "grad_norm": 1.2465202808380127, "learning_rate": 2.8493793103448275e-05, "loss": 0.1774, "step": 10531 }, { "epoch": 36.317241379310346, "grad_norm": 1.2709650993347168, "learning_rate": 2.8493333333333337e-05, "loss": 0.1651, "step": 10532 }, { "epoch": 36.320689655172416, "grad_norm": 0.6008318066596985, "learning_rate": 2.8492873563218393e-05, "loss": 0.1373, "step": 10533 }, { "epoch": 36.324137931034485, "grad_norm": 4.683824062347412, "learning_rate": 2.8492413793103448e-05, "loss": 0.1509, "step": 10534 }, { "epoch": 36.327586206896555, "grad_norm": 1.2468163967132568, "learning_rate": 2.8491954022988506e-05, "loss": 0.136, "step": 10535 }, { "epoch": 36.33103448275862, "grad_norm": 0.9098822474479675, "learning_rate": 2.8491494252873565e-05, "loss": 0.1859, "step": 10536 }, { "epoch": 36.33448275862069, "grad_norm": 1.1378865242004395, "learning_rate": 2.8491034482758624e-05, "loss": 0.1461, "step": 10537 }, { "epoch": 36.33793103448276, "grad_norm": 0.6759845614433289, "learning_rate": 2.849057471264368e-05, "loss": 0.1442, "step": 10538 }, { "epoch": 36.34137931034483, "grad_norm": 0.931471049785614, "learning_rate": 2.8490114942528734e-05, "loss": 0.1613, "step": 10539 }, { "epoch": 36.3448275862069, "grad_norm": 1.1854982376098633, "learning_rate": 2.8489655172413797e-05, "loss": 0.221, "step": 10540 }, { "epoch": 36.34827586206897, "grad_norm": 0.8214738368988037, "learning_rate": 2.8489195402298852e-05, "loss": 0.2598, "step": 10541 }, { "epoch": 36.351724137931036, "grad_norm": 0.6040201187133789, "learning_rate": 2.8488735632183907e-05, "loss": 0.1991, "step": 10542 }, { "epoch": 36.355172413793106, "grad_norm": 0.4871618449687958, "learning_rate": 2.8488275862068966e-05, "loss": 0.1947, "step": 10543 }, { "epoch": 36.358620689655176, "grad_norm": 0.6269735097885132, "learning_rate": 2.8487816091954024e-05, "loss": 0.1706, "step": 10544 }, { "epoch": 36.36206896551724, "grad_norm": 1.0456539392471313, "learning_rate": 2.8487356321839083e-05, "loss": 0.1971, "step": 10545 }, { "epoch": 36.36551724137931, "grad_norm": 0.6337583065032959, "learning_rate": 2.848689655172414e-05, "loss": 0.1729, "step": 10546 }, { "epoch": 36.36896551724138, "grad_norm": 0.569581925868988, "learning_rate": 2.8486436781609194e-05, "loss": 0.1771, "step": 10547 }, { "epoch": 36.37241379310345, "grad_norm": 0.7392777800559998, "learning_rate": 2.8485977011494252e-05, "loss": 0.1862, "step": 10548 }, { "epoch": 36.37586206896552, "grad_norm": 1.530870795249939, "learning_rate": 2.848551724137931e-05, "loss": 0.1751, "step": 10549 }, { "epoch": 36.37931034482759, "grad_norm": 0.5288779139518738, "learning_rate": 2.848505747126437e-05, "loss": 0.1606, "step": 10550 }, { "epoch": 36.38275862068966, "grad_norm": 1.3133635520935059, "learning_rate": 2.8484597701149425e-05, "loss": 0.16, "step": 10551 }, { "epoch": 36.38620689655173, "grad_norm": 2.7269318103790283, "learning_rate": 2.848413793103448e-05, "loss": 0.1718, "step": 10552 }, { "epoch": 36.389655172413796, "grad_norm": 1.6039531230926514, "learning_rate": 2.8483678160919542e-05, "loss": 0.1645, "step": 10553 }, { "epoch": 36.39310344827586, "grad_norm": 0.670252799987793, "learning_rate": 2.8483218390804598e-05, "loss": 0.157, "step": 10554 }, { "epoch": 36.39655172413793, "grad_norm": 0.8392021656036377, "learning_rate": 2.8482758620689656e-05, "loss": 0.1637, "step": 10555 }, { "epoch": 36.4, "grad_norm": 2.4055113792419434, "learning_rate": 2.8482298850574712e-05, "loss": 0.1592, "step": 10556 }, { "epoch": 36.40344827586207, "grad_norm": 0.6911833882331848, "learning_rate": 2.848183908045977e-05, "loss": 0.1547, "step": 10557 }, { "epoch": 36.40689655172414, "grad_norm": 0.7337626814842224, "learning_rate": 2.848137931034483e-05, "loss": 0.1444, "step": 10558 }, { "epoch": 36.41034482758621, "grad_norm": 1.63606858253479, "learning_rate": 2.8480919540229884e-05, "loss": 0.1342, "step": 10559 }, { "epoch": 36.41379310344828, "grad_norm": 0.836943507194519, "learning_rate": 2.8480459770114943e-05, "loss": 0.1515, "step": 10560 }, { "epoch": 36.41724137931035, "grad_norm": 0.7943999767303467, "learning_rate": 2.8480000000000002e-05, "loss": 0.1566, "step": 10561 }, { "epoch": 36.42068965517242, "grad_norm": 1.0311216115951538, "learning_rate": 2.8479540229885057e-05, "loss": 0.1399, "step": 10562 }, { "epoch": 36.42413793103448, "grad_norm": 0.9170905351638794, "learning_rate": 2.8479080459770116e-05, "loss": 0.1297, "step": 10563 }, { "epoch": 36.42758620689655, "grad_norm": 3.7997779846191406, "learning_rate": 2.847862068965517e-05, "loss": 0.1256, "step": 10564 }, { "epoch": 36.43103448275862, "grad_norm": 1.65092134475708, "learning_rate": 2.8478160919540233e-05, "loss": 0.2192, "step": 10565 }, { "epoch": 36.43448275862069, "grad_norm": 0.7837083339691162, "learning_rate": 2.847770114942529e-05, "loss": 0.2559, "step": 10566 }, { "epoch": 36.43793103448276, "grad_norm": 0.8362028002738953, "learning_rate": 2.8477241379310344e-05, "loss": 0.2105, "step": 10567 }, { "epoch": 36.44137931034483, "grad_norm": 0.8651548027992249, "learning_rate": 2.8476781609195402e-05, "loss": 0.2022, "step": 10568 }, { "epoch": 36.4448275862069, "grad_norm": 0.6275559067726135, "learning_rate": 2.847632183908046e-05, "loss": 0.1946, "step": 10569 }, { "epoch": 36.44827586206897, "grad_norm": 0.8254584074020386, "learning_rate": 2.8475862068965516e-05, "loss": 0.1956, "step": 10570 }, { "epoch": 36.45172413793104, "grad_norm": 0.6913663148880005, "learning_rate": 2.8475402298850575e-05, "loss": 0.1974, "step": 10571 }, { "epoch": 36.4551724137931, "grad_norm": 0.6076120734214783, "learning_rate": 2.847494252873563e-05, "loss": 0.1932, "step": 10572 }, { "epoch": 36.45862068965517, "grad_norm": 0.7563843131065369, "learning_rate": 2.8474482758620692e-05, "loss": 0.1937, "step": 10573 }, { "epoch": 36.46206896551724, "grad_norm": 0.6347376108169556, "learning_rate": 2.8474022988505748e-05, "loss": 0.1819, "step": 10574 }, { "epoch": 36.46551724137931, "grad_norm": 0.8092750310897827, "learning_rate": 2.8473563218390803e-05, "loss": 0.1573, "step": 10575 }, { "epoch": 36.46896551724138, "grad_norm": 0.7579020261764526, "learning_rate": 2.847310344827586e-05, "loss": 0.1572, "step": 10576 }, { "epoch": 36.47241379310345, "grad_norm": 0.718278706073761, "learning_rate": 2.847264367816092e-05, "loss": 0.1663, "step": 10577 }, { "epoch": 36.47586206896552, "grad_norm": 1.2597171068191528, "learning_rate": 2.847218390804598e-05, "loss": 0.1724, "step": 10578 }, { "epoch": 36.47931034482759, "grad_norm": 0.8201623558998108, "learning_rate": 2.8471724137931034e-05, "loss": 0.1676, "step": 10579 }, { "epoch": 36.48275862068966, "grad_norm": 1.0106804370880127, "learning_rate": 2.847126436781609e-05, "loss": 0.1428, "step": 10580 }, { "epoch": 36.48620689655172, "grad_norm": 0.549201488494873, "learning_rate": 2.8470804597701152e-05, "loss": 0.1568, "step": 10581 }, { "epoch": 36.48965517241379, "grad_norm": 0.7640755772590637, "learning_rate": 2.8470344827586207e-05, "loss": 0.1659, "step": 10582 }, { "epoch": 36.49310344827586, "grad_norm": 1.1343739032745361, "learning_rate": 2.8469885057471266e-05, "loss": 0.1493, "step": 10583 }, { "epoch": 36.49655172413793, "grad_norm": 0.8765577077865601, "learning_rate": 2.846942528735632e-05, "loss": 0.1301, "step": 10584 }, { "epoch": 36.5, "grad_norm": 0.8215808272361755, "learning_rate": 2.846896551724138e-05, "loss": 0.1407, "step": 10585 }, { "epoch": 36.50344827586207, "grad_norm": 0.8075642585754395, "learning_rate": 2.846850574712644e-05, "loss": 0.1495, "step": 10586 }, { "epoch": 36.50689655172414, "grad_norm": 0.8812727928161621, "learning_rate": 2.8468045977011494e-05, "loss": 0.1315, "step": 10587 }, { "epoch": 36.51034482758621, "grad_norm": 1.1314998865127563, "learning_rate": 2.8467586206896552e-05, "loss": 0.1455, "step": 10588 }, { "epoch": 36.51379310344828, "grad_norm": 0.9448500275611877, "learning_rate": 2.846712643678161e-05, "loss": 0.1228, "step": 10589 }, { "epoch": 36.51724137931034, "grad_norm": 1.2432101964950562, "learning_rate": 2.8466666666666666e-05, "loss": 0.1929, "step": 10590 }, { "epoch": 36.52068965517241, "grad_norm": 0.7791430354118347, "learning_rate": 2.8466206896551725e-05, "loss": 0.234, "step": 10591 }, { "epoch": 36.52413793103448, "grad_norm": 1.0737086534500122, "learning_rate": 2.846574712643678e-05, "loss": 0.2009, "step": 10592 }, { "epoch": 36.52758620689655, "grad_norm": 0.6732809543609619, "learning_rate": 2.8465287356321842e-05, "loss": 0.2237, "step": 10593 }, { "epoch": 36.53103448275862, "grad_norm": 0.8664089441299438, "learning_rate": 2.8464827586206898e-05, "loss": 0.1973, "step": 10594 }, { "epoch": 36.53448275862069, "grad_norm": 1.0970470905303955, "learning_rate": 2.8464367816091953e-05, "loss": 0.2042, "step": 10595 }, { "epoch": 36.53793103448276, "grad_norm": 0.782975971698761, "learning_rate": 2.846390804597701e-05, "loss": 0.1834, "step": 10596 }, { "epoch": 36.54137931034483, "grad_norm": 1.423864483833313, "learning_rate": 2.846344827586207e-05, "loss": 0.1916, "step": 10597 }, { "epoch": 36.5448275862069, "grad_norm": 0.5414683222770691, "learning_rate": 2.846298850574713e-05, "loss": 0.1878, "step": 10598 }, { "epoch": 36.54827586206896, "grad_norm": 3.3046352863311768, "learning_rate": 2.8462528735632184e-05, "loss": 0.1818, "step": 10599 }, { "epoch": 36.55172413793103, "grad_norm": 0.5925872325897217, "learning_rate": 2.846206896551724e-05, "loss": 0.1525, "step": 10600 }, { "epoch": 36.5551724137931, "grad_norm": 1.2766127586364746, "learning_rate": 2.84616091954023e-05, "loss": 0.1842, "step": 10601 }, { "epoch": 36.55862068965517, "grad_norm": 1.1166971921920776, "learning_rate": 2.8461149425287357e-05, "loss": 0.1747, "step": 10602 }, { "epoch": 36.56206896551724, "grad_norm": 0.6434834599494934, "learning_rate": 2.8460689655172412e-05, "loss": 0.1645, "step": 10603 }, { "epoch": 36.56551724137931, "grad_norm": 0.7356259822845459, "learning_rate": 2.846022988505747e-05, "loss": 0.1878, "step": 10604 }, { "epoch": 36.56896551724138, "grad_norm": 0.8893042206764221, "learning_rate": 2.845977011494253e-05, "loss": 0.1824, "step": 10605 }, { "epoch": 36.57241379310345, "grad_norm": 0.6090684533119202, "learning_rate": 2.8459310344827588e-05, "loss": 0.1523, "step": 10606 }, { "epoch": 36.57586206896552, "grad_norm": 1.970232367515564, "learning_rate": 2.8458850574712644e-05, "loss": 0.1691, "step": 10607 }, { "epoch": 36.57931034482758, "grad_norm": 1.078790545463562, "learning_rate": 2.84583908045977e-05, "loss": 0.1714, "step": 10608 }, { "epoch": 36.58275862068965, "grad_norm": 0.8035576343536377, "learning_rate": 2.845793103448276e-05, "loss": 0.1542, "step": 10609 }, { "epoch": 36.58620689655172, "grad_norm": 1.0219744443893433, "learning_rate": 2.8457471264367816e-05, "loss": 0.1469, "step": 10610 }, { "epoch": 36.58965517241379, "grad_norm": 0.904171884059906, "learning_rate": 2.8457011494252875e-05, "loss": 0.1407, "step": 10611 }, { "epoch": 36.59310344827586, "grad_norm": 0.87301105260849, "learning_rate": 2.845655172413793e-05, "loss": 0.119, "step": 10612 }, { "epoch": 36.59655172413793, "grad_norm": 1.1141283512115479, "learning_rate": 2.845609195402299e-05, "loss": 0.1489, "step": 10613 }, { "epoch": 36.6, "grad_norm": 2.995680332183838, "learning_rate": 2.8455632183908048e-05, "loss": 0.1605, "step": 10614 }, { "epoch": 36.60344827586207, "grad_norm": 1.7285150289535522, "learning_rate": 2.8455172413793103e-05, "loss": 0.1688, "step": 10615 }, { "epoch": 36.60689655172414, "grad_norm": 0.6569867134094238, "learning_rate": 2.845471264367816e-05, "loss": 0.2723, "step": 10616 }, { "epoch": 36.610344827586204, "grad_norm": 0.6267418265342712, "learning_rate": 2.845425287356322e-05, "loss": 0.2011, "step": 10617 }, { "epoch": 36.61379310344827, "grad_norm": 0.48151615262031555, "learning_rate": 2.8453793103448276e-05, "loss": 0.1869, "step": 10618 }, { "epoch": 36.61724137931034, "grad_norm": 0.6117608547210693, "learning_rate": 2.8453333333333334e-05, "loss": 0.206, "step": 10619 }, { "epoch": 36.62068965517241, "grad_norm": 0.6179983019828796, "learning_rate": 2.845287356321839e-05, "loss": 0.1783, "step": 10620 }, { "epoch": 36.62413793103448, "grad_norm": 1.074867844581604, "learning_rate": 2.845241379310345e-05, "loss": 0.1923, "step": 10621 }, { "epoch": 36.62758620689655, "grad_norm": 0.558228075504303, "learning_rate": 2.8451954022988507e-05, "loss": 0.1839, "step": 10622 }, { "epoch": 36.63103448275862, "grad_norm": 1.2533555030822754, "learning_rate": 2.8451494252873562e-05, "loss": 0.2026, "step": 10623 }, { "epoch": 36.63448275862069, "grad_norm": 0.5786985158920288, "learning_rate": 2.845103448275862e-05, "loss": 0.1617, "step": 10624 }, { "epoch": 36.63793103448276, "grad_norm": 0.7490576505661011, "learning_rate": 2.845057471264368e-05, "loss": 0.1823, "step": 10625 }, { "epoch": 36.641379310344824, "grad_norm": 0.5869019627571106, "learning_rate": 2.8450114942528738e-05, "loss": 0.1622, "step": 10626 }, { "epoch": 36.644827586206894, "grad_norm": 0.682240903377533, "learning_rate": 2.8449655172413794e-05, "loss": 0.1704, "step": 10627 }, { "epoch": 36.648275862068964, "grad_norm": 15.57264232635498, "learning_rate": 2.844919540229885e-05, "loss": 0.1629, "step": 10628 }, { "epoch": 36.65172413793103, "grad_norm": 0.7239332795143127, "learning_rate": 2.844873563218391e-05, "loss": 0.1785, "step": 10629 }, { "epoch": 36.6551724137931, "grad_norm": 0.954237699508667, "learning_rate": 2.8448275862068966e-05, "loss": 0.1604, "step": 10630 }, { "epoch": 36.65862068965517, "grad_norm": 1.0599231719970703, "learning_rate": 2.844781609195402e-05, "loss": 0.1565, "step": 10631 }, { "epoch": 36.66206896551724, "grad_norm": 0.9431922435760498, "learning_rate": 2.844735632183908e-05, "loss": 0.1678, "step": 10632 }, { "epoch": 36.66551724137931, "grad_norm": 0.5967001914978027, "learning_rate": 2.844689655172414e-05, "loss": 0.1396, "step": 10633 }, { "epoch": 36.66896551724138, "grad_norm": 0.7283697128295898, "learning_rate": 2.8446436781609198e-05, "loss": 0.1312, "step": 10634 }, { "epoch": 36.672413793103445, "grad_norm": 2.758375883102417, "learning_rate": 2.8445977011494253e-05, "loss": 0.1361, "step": 10635 }, { "epoch": 36.675862068965515, "grad_norm": 0.8988423347473145, "learning_rate": 2.8445517241379308e-05, "loss": 0.1363, "step": 10636 }, { "epoch": 36.679310344827584, "grad_norm": 1.3389264345169067, "learning_rate": 2.844505747126437e-05, "loss": 0.1348, "step": 10637 }, { "epoch": 36.682758620689654, "grad_norm": 0.861979067325592, "learning_rate": 2.8444597701149425e-05, "loss": 0.1404, "step": 10638 }, { "epoch": 36.686206896551724, "grad_norm": 0.7938742637634277, "learning_rate": 2.8444137931034484e-05, "loss": 0.1467, "step": 10639 }, { "epoch": 36.689655172413794, "grad_norm": 1.1786937713623047, "learning_rate": 2.844367816091954e-05, "loss": 0.1947, "step": 10640 }, { "epoch": 36.69310344827586, "grad_norm": 0.6098859310150146, "learning_rate": 2.8443218390804598e-05, "loss": 0.2637, "step": 10641 }, { "epoch": 36.69655172413793, "grad_norm": 0.505465567111969, "learning_rate": 2.8442758620689657e-05, "loss": 0.1958, "step": 10642 }, { "epoch": 36.7, "grad_norm": 0.5882567167282104, "learning_rate": 2.8442298850574712e-05, "loss": 0.1902, "step": 10643 }, { "epoch": 36.703448275862065, "grad_norm": 0.535858154296875, "learning_rate": 2.844183908045977e-05, "loss": 0.2091, "step": 10644 }, { "epoch": 36.706896551724135, "grad_norm": 0.6405738592147827, "learning_rate": 2.844137931034483e-05, "loss": 0.1966, "step": 10645 }, { "epoch": 36.710344827586205, "grad_norm": 0.627169668674469, "learning_rate": 2.8440919540229885e-05, "loss": 0.1792, "step": 10646 }, { "epoch": 36.713793103448275, "grad_norm": 0.9906045198440552, "learning_rate": 2.8440459770114943e-05, "loss": 0.2046, "step": 10647 }, { "epoch": 36.717241379310344, "grad_norm": 0.7606669068336487, "learning_rate": 2.844e-05, "loss": 0.1833, "step": 10648 }, { "epoch": 36.720689655172414, "grad_norm": 0.6957988739013672, "learning_rate": 2.843954022988506e-05, "loss": 0.1624, "step": 10649 }, { "epoch": 36.724137931034484, "grad_norm": 0.97575443983078, "learning_rate": 2.8439080459770116e-05, "loss": 0.1612, "step": 10650 }, { "epoch": 36.727586206896554, "grad_norm": 0.8765912652015686, "learning_rate": 2.843862068965517e-05, "loss": 0.1479, "step": 10651 }, { "epoch": 36.73103448275862, "grad_norm": 0.6979538798332214, "learning_rate": 2.843816091954023e-05, "loss": 0.1643, "step": 10652 }, { "epoch": 36.734482758620686, "grad_norm": 0.5278213620185852, "learning_rate": 2.843770114942529e-05, "loss": 0.1611, "step": 10653 }, { "epoch": 36.737931034482756, "grad_norm": 1.0948145389556885, "learning_rate": 2.8437241379310347e-05, "loss": 0.148, "step": 10654 }, { "epoch": 36.741379310344826, "grad_norm": 0.6101579070091248, "learning_rate": 2.8436781609195403e-05, "loss": 0.1502, "step": 10655 }, { "epoch": 36.744827586206895, "grad_norm": 0.4637543857097626, "learning_rate": 2.8436321839080458e-05, "loss": 0.1236, "step": 10656 }, { "epoch": 36.748275862068965, "grad_norm": 0.6627601385116577, "learning_rate": 2.843586206896552e-05, "loss": 0.1565, "step": 10657 }, { "epoch": 36.751724137931035, "grad_norm": 0.9417009949684143, "learning_rate": 2.8435402298850575e-05, "loss": 0.1533, "step": 10658 }, { "epoch": 36.755172413793105, "grad_norm": 1.6902151107788086, "learning_rate": 2.843494252873563e-05, "loss": 0.1595, "step": 10659 }, { "epoch": 36.758620689655174, "grad_norm": 0.6132351160049438, "learning_rate": 2.843448275862069e-05, "loss": 0.1383, "step": 10660 }, { "epoch": 36.762068965517244, "grad_norm": 2.0332047939300537, "learning_rate": 2.8434022988505748e-05, "loss": 0.1545, "step": 10661 }, { "epoch": 36.765517241379314, "grad_norm": 0.9808468818664551, "learning_rate": 2.8433563218390807e-05, "loss": 0.1462, "step": 10662 }, { "epoch": 36.76896551724138, "grad_norm": 1.0192492008209229, "learning_rate": 2.8433103448275862e-05, "loss": 0.1388, "step": 10663 }, { "epoch": 36.772413793103446, "grad_norm": 0.7594610452651978, "learning_rate": 2.8432643678160917e-05, "loss": 0.1551, "step": 10664 }, { "epoch": 36.775862068965516, "grad_norm": 2.445955514907837, "learning_rate": 2.843218390804598e-05, "loss": 0.1876, "step": 10665 }, { "epoch": 36.779310344827586, "grad_norm": 0.9526570439338684, "learning_rate": 2.8431724137931035e-05, "loss": 0.2474, "step": 10666 }, { "epoch": 36.782758620689656, "grad_norm": 0.6428340673446655, "learning_rate": 2.8431264367816093e-05, "loss": 0.2045, "step": 10667 }, { "epoch": 36.786206896551725, "grad_norm": 1.00084388256073, "learning_rate": 2.843080459770115e-05, "loss": 0.2094, "step": 10668 }, { "epoch": 36.789655172413795, "grad_norm": 1.0473942756652832, "learning_rate": 2.8430344827586207e-05, "loss": 0.1906, "step": 10669 }, { "epoch": 36.793103448275865, "grad_norm": 0.5683398842811584, "learning_rate": 2.8429885057471266e-05, "loss": 0.1898, "step": 10670 }, { "epoch": 36.796551724137935, "grad_norm": 0.749152660369873, "learning_rate": 2.842942528735632e-05, "loss": 0.1708, "step": 10671 }, { "epoch": 36.8, "grad_norm": 0.6771622896194458, "learning_rate": 2.842896551724138e-05, "loss": 0.1915, "step": 10672 }, { "epoch": 36.80344827586207, "grad_norm": 0.7948886156082153, "learning_rate": 2.842850574712644e-05, "loss": 0.189, "step": 10673 }, { "epoch": 36.80689655172414, "grad_norm": 0.6923906803131104, "learning_rate": 2.8428045977011494e-05, "loss": 0.1631, "step": 10674 }, { "epoch": 36.810344827586206, "grad_norm": 0.6718952059745789, "learning_rate": 2.8427586206896553e-05, "loss": 0.1748, "step": 10675 }, { "epoch": 36.813793103448276, "grad_norm": 0.7793512940406799, "learning_rate": 2.8427126436781608e-05, "loss": 0.1509, "step": 10676 }, { "epoch": 36.817241379310346, "grad_norm": 0.6178526878356934, "learning_rate": 2.842666666666667e-05, "loss": 0.1716, "step": 10677 }, { "epoch": 36.820689655172416, "grad_norm": 0.9441781044006348, "learning_rate": 2.8426206896551725e-05, "loss": 0.1657, "step": 10678 }, { "epoch": 36.824137931034485, "grad_norm": 0.7764535546302795, "learning_rate": 2.842574712643678e-05, "loss": 0.1795, "step": 10679 }, { "epoch": 36.827586206896555, "grad_norm": 0.6677026152610779, "learning_rate": 2.842528735632184e-05, "loss": 0.1634, "step": 10680 }, { "epoch": 36.83103448275862, "grad_norm": 1.0940403938293457, "learning_rate": 2.8424827586206898e-05, "loss": 0.1527, "step": 10681 }, { "epoch": 36.83448275862069, "grad_norm": 0.8840386867523193, "learning_rate": 2.8424367816091957e-05, "loss": 0.1373, "step": 10682 }, { "epoch": 36.83793103448276, "grad_norm": 0.7716155648231506, "learning_rate": 2.8423908045977012e-05, "loss": 0.1537, "step": 10683 }, { "epoch": 36.84137931034483, "grad_norm": 0.6814297437667847, "learning_rate": 2.8423448275862067e-05, "loss": 0.1692, "step": 10684 }, { "epoch": 36.8448275862069, "grad_norm": 0.7596985697746277, "learning_rate": 2.842298850574713e-05, "loss": 0.1412, "step": 10685 }, { "epoch": 36.84827586206897, "grad_norm": 0.9237216114997864, "learning_rate": 2.8422528735632185e-05, "loss": 0.1393, "step": 10686 }, { "epoch": 36.851724137931036, "grad_norm": 3.2781126499176025, "learning_rate": 2.8422068965517243e-05, "loss": 0.129, "step": 10687 }, { "epoch": 36.855172413793106, "grad_norm": 0.9052191376686096, "learning_rate": 2.84216091954023e-05, "loss": 0.1453, "step": 10688 }, { "epoch": 36.858620689655176, "grad_norm": 1.0917690992355347, "learning_rate": 2.8421149425287357e-05, "loss": 0.1589, "step": 10689 }, { "epoch": 36.86206896551724, "grad_norm": 1.2072266340255737, "learning_rate": 2.8420689655172416e-05, "loss": 0.2016, "step": 10690 }, { "epoch": 36.86551724137931, "grad_norm": 0.6516867280006409, "learning_rate": 2.842022988505747e-05, "loss": 0.2196, "step": 10691 }, { "epoch": 36.86896551724138, "grad_norm": 1.1524584293365479, "learning_rate": 2.8419770114942527e-05, "loss": 0.2034, "step": 10692 }, { "epoch": 36.87241379310345, "grad_norm": 0.5823729038238525, "learning_rate": 2.841931034482759e-05, "loss": 0.2084, "step": 10693 }, { "epoch": 36.87586206896552, "grad_norm": 1.477089285850525, "learning_rate": 2.8418850574712644e-05, "loss": 0.1995, "step": 10694 }, { "epoch": 36.87931034482759, "grad_norm": 0.7116855978965759, "learning_rate": 2.8418390804597703e-05, "loss": 0.1731, "step": 10695 }, { "epoch": 36.88275862068966, "grad_norm": 0.812875509262085, "learning_rate": 2.8417931034482758e-05, "loss": 0.2003, "step": 10696 }, { "epoch": 36.88620689655173, "grad_norm": 0.6580550670623779, "learning_rate": 2.8417471264367817e-05, "loss": 0.1877, "step": 10697 }, { "epoch": 36.889655172413796, "grad_norm": 1.0628372430801392, "learning_rate": 2.8417011494252875e-05, "loss": 0.1741, "step": 10698 }, { "epoch": 36.89310344827586, "grad_norm": 0.9237539172172546, "learning_rate": 2.841655172413793e-05, "loss": 0.1925, "step": 10699 }, { "epoch": 36.89655172413793, "grad_norm": 0.5300173759460449, "learning_rate": 2.841609195402299e-05, "loss": 0.157, "step": 10700 }, { "epoch": 36.9, "grad_norm": 1.0923961400985718, "learning_rate": 2.8415632183908048e-05, "loss": 0.1578, "step": 10701 }, { "epoch": 36.90344827586207, "grad_norm": 0.6719096302986145, "learning_rate": 2.8415172413793103e-05, "loss": 0.1786, "step": 10702 }, { "epoch": 36.90689655172414, "grad_norm": 0.8636154532432556, "learning_rate": 2.8414712643678162e-05, "loss": 0.188, "step": 10703 }, { "epoch": 36.91034482758621, "grad_norm": 3.3167741298675537, "learning_rate": 2.8414252873563217e-05, "loss": 0.1686, "step": 10704 }, { "epoch": 36.91379310344828, "grad_norm": 0.9230095744132996, "learning_rate": 2.841379310344828e-05, "loss": 0.1907, "step": 10705 }, { "epoch": 36.91724137931035, "grad_norm": 1.6048625707626343, "learning_rate": 2.8413333333333335e-05, "loss": 0.1589, "step": 10706 }, { "epoch": 36.92068965517242, "grad_norm": 0.9631613492965698, "learning_rate": 2.841287356321839e-05, "loss": 0.1364, "step": 10707 }, { "epoch": 36.92413793103448, "grad_norm": 0.7647831439971924, "learning_rate": 2.841241379310345e-05, "loss": 0.1627, "step": 10708 }, { "epoch": 36.92758620689655, "grad_norm": 1.0231801271438599, "learning_rate": 2.8411954022988507e-05, "loss": 0.1435, "step": 10709 }, { "epoch": 36.93103448275862, "grad_norm": 0.698493480682373, "learning_rate": 2.8411494252873566e-05, "loss": 0.1587, "step": 10710 }, { "epoch": 36.93448275862069, "grad_norm": 0.6284909844398499, "learning_rate": 2.841103448275862e-05, "loss": 0.1307, "step": 10711 }, { "epoch": 36.93793103448276, "grad_norm": 0.9702403545379639, "learning_rate": 2.8410574712643677e-05, "loss": 0.1525, "step": 10712 }, { "epoch": 36.94137931034483, "grad_norm": 1.6756486892700195, "learning_rate": 2.841011494252874e-05, "loss": 0.1395, "step": 10713 }, { "epoch": 36.9448275862069, "grad_norm": 0.9406589269638062, "learning_rate": 2.8409655172413794e-05, "loss": 0.1481, "step": 10714 }, { "epoch": 36.94827586206897, "grad_norm": 1.122393250465393, "learning_rate": 2.8409195402298853e-05, "loss": 0.2052, "step": 10715 }, { "epoch": 36.95172413793104, "grad_norm": 0.6146906614303589, "learning_rate": 2.8408735632183908e-05, "loss": 0.2212, "step": 10716 }, { "epoch": 36.9551724137931, "grad_norm": 1.4909731149673462, "learning_rate": 2.8408275862068967e-05, "loss": 0.2339, "step": 10717 }, { "epoch": 36.95862068965517, "grad_norm": 0.5516590476036072, "learning_rate": 2.8407816091954025e-05, "loss": 0.183, "step": 10718 }, { "epoch": 36.96206896551724, "grad_norm": 0.783720076084137, "learning_rate": 2.840735632183908e-05, "loss": 0.1728, "step": 10719 }, { "epoch": 36.96551724137931, "grad_norm": 0.7455956935882568, "learning_rate": 2.8406896551724136e-05, "loss": 0.1901, "step": 10720 }, { "epoch": 36.96896551724138, "grad_norm": 0.8754481077194214, "learning_rate": 2.8406436781609198e-05, "loss": 0.1656, "step": 10721 }, { "epoch": 36.97241379310345, "grad_norm": 0.7258318662643433, "learning_rate": 2.8405977011494253e-05, "loss": 0.1815, "step": 10722 }, { "epoch": 36.97586206896552, "grad_norm": 0.8053443431854248, "learning_rate": 2.8405517241379312e-05, "loss": 0.1467, "step": 10723 }, { "epoch": 36.97931034482759, "grad_norm": 0.9864717125892639, "learning_rate": 2.8405057471264367e-05, "loss": 0.1626, "step": 10724 }, { "epoch": 36.98275862068966, "grad_norm": 0.7825937271118164, "learning_rate": 2.8404597701149426e-05, "loss": 0.162, "step": 10725 }, { "epoch": 36.98620689655172, "grad_norm": 0.8081105947494507, "learning_rate": 2.8404137931034485e-05, "loss": 0.1557, "step": 10726 }, { "epoch": 36.98965517241379, "grad_norm": 0.7564564943313599, "learning_rate": 2.840367816091954e-05, "loss": 0.1509, "step": 10727 }, { "epoch": 36.99310344827586, "grad_norm": 0.7210036516189575, "learning_rate": 2.84032183908046e-05, "loss": 0.1566, "step": 10728 }, { "epoch": 36.99655172413793, "grad_norm": 1.1356292963027954, "learning_rate": 2.8402758620689657e-05, "loss": 0.1527, "step": 10729 }, { "epoch": 37.0, "grad_norm": 2.12211537361145, "learning_rate": 2.8402298850574712e-05, "loss": 0.1858, "step": 10730 }, { "epoch": 37.00344827586207, "grad_norm": 0.7541049718856812, "learning_rate": 2.840183908045977e-05, "loss": 0.2208, "step": 10731 }, { "epoch": 37.00689655172414, "grad_norm": 0.5402161478996277, "learning_rate": 2.8401379310344826e-05, "loss": 0.2185, "step": 10732 }, { "epoch": 37.01034482758621, "grad_norm": 1.5043203830718994, "learning_rate": 2.840091954022989e-05, "loss": 0.1709, "step": 10733 }, { "epoch": 37.01379310344828, "grad_norm": 0.767943799495697, "learning_rate": 2.8400459770114944e-05, "loss": 0.1676, "step": 10734 }, { "epoch": 37.01724137931034, "grad_norm": 0.725738525390625, "learning_rate": 2.84e-05, "loss": 0.1831, "step": 10735 }, { "epoch": 37.02068965517241, "grad_norm": 0.5816261768341064, "learning_rate": 2.8399540229885058e-05, "loss": 0.1798, "step": 10736 }, { "epoch": 37.02413793103448, "grad_norm": 0.5923587679862976, "learning_rate": 2.8399080459770117e-05, "loss": 0.1787, "step": 10737 }, { "epoch": 37.02758620689655, "grad_norm": 0.6582279205322266, "learning_rate": 2.8398620689655175e-05, "loss": 0.1639, "step": 10738 }, { "epoch": 37.03103448275862, "grad_norm": 0.6987491846084595, "learning_rate": 2.839816091954023e-05, "loss": 0.1538, "step": 10739 }, { "epoch": 37.03448275862069, "grad_norm": 0.9792788028717041, "learning_rate": 2.8397701149425286e-05, "loss": 0.1768, "step": 10740 }, { "epoch": 37.03793103448276, "grad_norm": 0.6915304660797119, "learning_rate": 2.8397241379310348e-05, "loss": 0.1484, "step": 10741 }, { "epoch": 37.04137931034483, "grad_norm": 0.49615979194641113, "learning_rate": 2.8396781609195403e-05, "loss": 0.1514, "step": 10742 }, { "epoch": 37.0448275862069, "grad_norm": 0.6410773992538452, "learning_rate": 2.8396321839080462e-05, "loss": 0.171, "step": 10743 }, { "epoch": 37.04827586206896, "grad_norm": 0.5026891827583313, "learning_rate": 2.8395862068965517e-05, "loss": 0.1448, "step": 10744 }, { "epoch": 37.05172413793103, "grad_norm": 0.8851748704910278, "learning_rate": 2.8395402298850576e-05, "loss": 0.1308, "step": 10745 }, { "epoch": 37.0551724137931, "grad_norm": 0.9697953462600708, "learning_rate": 2.8394942528735634e-05, "loss": 0.1515, "step": 10746 }, { "epoch": 37.05862068965517, "grad_norm": 1.2457294464111328, "learning_rate": 2.839448275862069e-05, "loss": 0.1352, "step": 10747 }, { "epoch": 37.06206896551724, "grad_norm": 0.605923593044281, "learning_rate": 2.8394022988505745e-05, "loss": 0.1411, "step": 10748 }, { "epoch": 37.06551724137931, "grad_norm": 0.8519453406333923, "learning_rate": 2.8393563218390807e-05, "loss": 0.135, "step": 10749 }, { "epoch": 37.06896551724138, "grad_norm": 1.1566009521484375, "learning_rate": 2.8393103448275862e-05, "loss": 0.1346, "step": 10750 }, { "epoch": 37.07241379310345, "grad_norm": 1.0357335805892944, "learning_rate": 2.839264367816092e-05, "loss": 0.148, "step": 10751 }, { "epoch": 37.07586206896552, "grad_norm": 0.5856931805610657, "learning_rate": 2.8392183908045976e-05, "loss": 0.1227, "step": 10752 }, { "epoch": 37.07931034482758, "grad_norm": 0.8286438584327698, "learning_rate": 2.8391724137931035e-05, "loss": 0.1348, "step": 10753 }, { "epoch": 37.08275862068965, "grad_norm": 0.9555977582931519, "learning_rate": 2.8391264367816094e-05, "loss": 0.138, "step": 10754 }, { "epoch": 37.08620689655172, "grad_norm": 2.9049391746520996, "learning_rate": 2.839080459770115e-05, "loss": 0.1763, "step": 10755 }, { "epoch": 37.08965517241379, "grad_norm": 1.362980842590332, "learning_rate": 2.8390344827586208e-05, "loss": 0.2029, "step": 10756 }, { "epoch": 37.09310344827586, "grad_norm": 0.6285291910171509, "learning_rate": 2.8389885057471266e-05, "loss": 0.1905, "step": 10757 }, { "epoch": 37.09655172413793, "grad_norm": 0.8561713695526123, "learning_rate": 2.8389425287356322e-05, "loss": 0.1945, "step": 10758 }, { "epoch": 37.1, "grad_norm": 0.8591920137405396, "learning_rate": 2.838896551724138e-05, "loss": 0.1904, "step": 10759 }, { "epoch": 37.10344827586207, "grad_norm": 0.5554085969924927, "learning_rate": 2.8388505747126436e-05, "loss": 0.2017, "step": 10760 }, { "epoch": 37.10689655172414, "grad_norm": 0.9448529481887817, "learning_rate": 2.8388045977011498e-05, "loss": 0.1663, "step": 10761 }, { "epoch": 37.110344827586204, "grad_norm": 0.7957330346107483, "learning_rate": 2.8387586206896553e-05, "loss": 0.1651, "step": 10762 }, { "epoch": 37.11379310344827, "grad_norm": 0.5791476964950562, "learning_rate": 2.838712643678161e-05, "loss": 0.1631, "step": 10763 }, { "epoch": 37.11724137931034, "grad_norm": 1.0163462162017822, "learning_rate": 2.8386666666666667e-05, "loss": 0.1659, "step": 10764 }, { "epoch": 37.12068965517241, "grad_norm": 1.1884227991104126, "learning_rate": 2.8386206896551726e-05, "loss": 0.1722, "step": 10765 }, { "epoch": 37.12413793103448, "grad_norm": 0.6064678430557251, "learning_rate": 2.8385747126436784e-05, "loss": 0.1445, "step": 10766 }, { "epoch": 37.12758620689655, "grad_norm": 0.9491970539093018, "learning_rate": 2.838528735632184e-05, "loss": 0.1756, "step": 10767 }, { "epoch": 37.13103448275862, "grad_norm": 0.7294265031814575, "learning_rate": 2.8384827586206895e-05, "loss": 0.1433, "step": 10768 }, { "epoch": 37.13448275862069, "grad_norm": 0.6266220808029175, "learning_rate": 2.8384367816091957e-05, "loss": 0.1463, "step": 10769 }, { "epoch": 37.13793103448276, "grad_norm": 1.3589081764221191, "learning_rate": 2.8383908045977012e-05, "loss": 0.1451, "step": 10770 }, { "epoch": 37.141379310344824, "grad_norm": 0.6135518550872803, "learning_rate": 2.838344827586207e-05, "loss": 0.151, "step": 10771 }, { "epoch": 37.144827586206894, "grad_norm": 1.5103486776351929, "learning_rate": 2.8382988505747126e-05, "loss": 0.1262, "step": 10772 }, { "epoch": 37.148275862068964, "grad_norm": 0.702298104763031, "learning_rate": 2.8382528735632185e-05, "loss": 0.1414, "step": 10773 }, { "epoch": 37.15172413793103, "grad_norm": 0.7136591076850891, "learning_rate": 2.8382068965517244e-05, "loss": 0.1349, "step": 10774 }, { "epoch": 37.1551724137931, "grad_norm": 0.8652254939079285, "learning_rate": 2.83816091954023e-05, "loss": 0.1256, "step": 10775 }, { "epoch": 37.15862068965517, "grad_norm": 1.4719703197479248, "learning_rate": 2.8381149425287358e-05, "loss": 0.1213, "step": 10776 }, { "epoch": 37.16206896551724, "grad_norm": 0.6411932110786438, "learning_rate": 2.8380689655172416e-05, "loss": 0.1199, "step": 10777 }, { "epoch": 37.16551724137931, "grad_norm": 0.838486909866333, "learning_rate": 2.838022988505747e-05, "loss": 0.1405, "step": 10778 }, { "epoch": 37.16896551724138, "grad_norm": 0.8402099013328552, "learning_rate": 2.837977011494253e-05, "loss": 0.1287, "step": 10779 }, { "epoch": 37.172413793103445, "grad_norm": 1.247376561164856, "learning_rate": 2.8379310344827586e-05, "loss": 0.1467, "step": 10780 }, { "epoch": 37.175862068965515, "grad_norm": 0.7271004319190979, "learning_rate": 2.8378850574712644e-05, "loss": 0.259, "step": 10781 }, { "epoch": 37.179310344827584, "grad_norm": 0.7793028950691223, "learning_rate": 2.8378390804597703e-05, "loss": 0.1915, "step": 10782 }, { "epoch": 37.182758620689654, "grad_norm": 1.0284976959228516, "learning_rate": 2.837793103448276e-05, "loss": 0.1894, "step": 10783 }, { "epoch": 37.186206896551724, "grad_norm": 0.8512124419212341, "learning_rate": 2.8377471264367817e-05, "loss": 0.1653, "step": 10784 }, { "epoch": 37.189655172413794, "grad_norm": 0.7709231376647949, "learning_rate": 2.8377011494252876e-05, "loss": 0.1866, "step": 10785 }, { "epoch": 37.19310344827586, "grad_norm": 0.6200013160705566, "learning_rate": 2.837655172413793e-05, "loss": 0.1683, "step": 10786 }, { "epoch": 37.19655172413793, "grad_norm": 0.7287511229515076, "learning_rate": 2.837609195402299e-05, "loss": 0.1917, "step": 10787 }, { "epoch": 37.2, "grad_norm": 0.6345940232276917, "learning_rate": 2.8375632183908045e-05, "loss": 0.1711, "step": 10788 }, { "epoch": 37.203448275862065, "grad_norm": 1.0029666423797607, "learning_rate": 2.8375172413793107e-05, "loss": 0.1653, "step": 10789 }, { "epoch": 37.206896551724135, "grad_norm": 0.5866592526435852, "learning_rate": 2.8374712643678162e-05, "loss": 0.1555, "step": 10790 }, { "epoch": 37.210344827586205, "grad_norm": 0.715246856212616, "learning_rate": 2.8374252873563218e-05, "loss": 0.1807, "step": 10791 }, { "epoch": 37.213793103448275, "grad_norm": 0.8988139033317566, "learning_rate": 2.8373793103448276e-05, "loss": 0.1602, "step": 10792 }, { "epoch": 37.217241379310344, "grad_norm": 0.8318347334861755, "learning_rate": 2.8373333333333335e-05, "loss": 0.1564, "step": 10793 }, { "epoch": 37.220689655172414, "grad_norm": 1.2948133945465088, "learning_rate": 2.8372873563218394e-05, "loss": 0.1516, "step": 10794 }, { "epoch": 37.224137931034484, "grad_norm": 1.1699612140655518, "learning_rate": 2.837241379310345e-05, "loss": 0.189, "step": 10795 }, { "epoch": 37.227586206896554, "grad_norm": 0.7963473796844482, "learning_rate": 2.8371954022988504e-05, "loss": 0.1606, "step": 10796 }, { "epoch": 37.23103448275862, "grad_norm": 0.627083420753479, "learning_rate": 2.8371494252873566e-05, "loss": 0.1292, "step": 10797 }, { "epoch": 37.234482758620686, "grad_norm": 0.7309457063674927, "learning_rate": 2.837103448275862e-05, "loss": 0.1601, "step": 10798 }, { "epoch": 37.237931034482756, "grad_norm": 0.9074265956878662, "learning_rate": 2.837057471264368e-05, "loss": 0.1474, "step": 10799 }, { "epoch": 37.241379310344826, "grad_norm": 0.6212066411972046, "learning_rate": 2.8370114942528736e-05, "loss": 0.1407, "step": 10800 }, { "epoch": 37.244827586206895, "grad_norm": 0.7132729291915894, "learning_rate": 2.8369655172413794e-05, "loss": 0.137, "step": 10801 }, { "epoch": 37.248275862068965, "grad_norm": 0.6174318790435791, "learning_rate": 2.8369195402298853e-05, "loss": 0.1215, "step": 10802 }, { "epoch": 37.251724137931035, "grad_norm": 2.13437819480896, "learning_rate": 2.8368735632183908e-05, "loss": 0.1173, "step": 10803 }, { "epoch": 37.255172413793105, "grad_norm": 1.1946524381637573, "learning_rate": 2.8368275862068967e-05, "loss": 0.1475, "step": 10804 }, { "epoch": 37.258620689655174, "grad_norm": 1.019831657409668, "learning_rate": 2.8367816091954026e-05, "loss": 0.1697, "step": 10805 }, { "epoch": 37.262068965517244, "grad_norm": 0.756076991558075, "learning_rate": 2.836735632183908e-05, "loss": 0.2271, "step": 10806 }, { "epoch": 37.265517241379314, "grad_norm": 0.7528068423271179, "learning_rate": 2.836689655172414e-05, "loss": 0.1991, "step": 10807 }, { "epoch": 37.26896551724138, "grad_norm": 0.5740979909896851, "learning_rate": 2.8366436781609195e-05, "loss": 0.192, "step": 10808 }, { "epoch": 37.272413793103446, "grad_norm": 0.5245842337608337, "learning_rate": 2.8365977011494254e-05, "loss": 0.1696, "step": 10809 }, { "epoch": 37.275862068965516, "grad_norm": 1.051989197731018, "learning_rate": 2.8365517241379312e-05, "loss": 0.1903, "step": 10810 }, { "epoch": 37.279310344827586, "grad_norm": 0.6176708340644836, "learning_rate": 2.8365057471264368e-05, "loss": 0.1822, "step": 10811 }, { "epoch": 37.282758620689656, "grad_norm": 0.6884135007858276, "learning_rate": 2.8364597701149426e-05, "loss": 0.1686, "step": 10812 }, { "epoch": 37.286206896551725, "grad_norm": 0.8878259062767029, "learning_rate": 2.8364137931034485e-05, "loss": 0.1621, "step": 10813 }, { "epoch": 37.289655172413795, "grad_norm": 0.6416940093040466, "learning_rate": 2.836367816091954e-05, "loss": 0.1659, "step": 10814 }, { "epoch": 37.293103448275865, "grad_norm": 0.5948601961135864, "learning_rate": 2.83632183908046e-05, "loss": 0.1501, "step": 10815 }, { "epoch": 37.296551724137935, "grad_norm": 0.7837961912155151, "learning_rate": 2.8362758620689654e-05, "loss": 0.1451, "step": 10816 }, { "epoch": 37.3, "grad_norm": 0.6346577405929565, "learning_rate": 2.8362298850574716e-05, "loss": 0.1413, "step": 10817 }, { "epoch": 37.30344827586207, "grad_norm": 3.471308946609497, "learning_rate": 2.836183908045977e-05, "loss": 0.1446, "step": 10818 }, { "epoch": 37.30689655172414, "grad_norm": 0.8195681571960449, "learning_rate": 2.8361379310344827e-05, "loss": 0.1577, "step": 10819 }, { "epoch": 37.310344827586206, "grad_norm": 0.5924891829490662, "learning_rate": 2.8360919540229886e-05, "loss": 0.1799, "step": 10820 }, { "epoch": 37.313793103448276, "grad_norm": 0.7653655409812927, "learning_rate": 2.8360459770114944e-05, "loss": 0.1279, "step": 10821 }, { "epoch": 37.317241379310346, "grad_norm": 0.6101846694946289, "learning_rate": 2.8360000000000003e-05, "loss": 0.1268, "step": 10822 }, { "epoch": 37.320689655172416, "grad_norm": 0.7302181720733643, "learning_rate": 2.8359540229885058e-05, "loss": 0.1494, "step": 10823 }, { "epoch": 37.324137931034485, "grad_norm": 0.7034152150154114, "learning_rate": 2.8359080459770113e-05, "loss": 0.1653, "step": 10824 }, { "epoch": 37.327586206896555, "grad_norm": 0.7085485458374023, "learning_rate": 2.8358620689655176e-05, "loss": 0.1481, "step": 10825 }, { "epoch": 37.33103448275862, "grad_norm": 0.8181660771369934, "learning_rate": 2.835816091954023e-05, "loss": 0.1313, "step": 10826 }, { "epoch": 37.33448275862069, "grad_norm": 0.8919395804405212, "learning_rate": 2.835770114942529e-05, "loss": 0.1455, "step": 10827 }, { "epoch": 37.33793103448276, "grad_norm": 1.0895432233810425, "learning_rate": 2.8357241379310345e-05, "loss": 0.125, "step": 10828 }, { "epoch": 37.34137931034483, "grad_norm": 0.9261983633041382, "learning_rate": 2.8356781609195404e-05, "loss": 0.1435, "step": 10829 }, { "epoch": 37.3448275862069, "grad_norm": 1.54006826877594, "learning_rate": 2.8356321839080462e-05, "loss": 0.2024, "step": 10830 }, { "epoch": 37.34827586206897, "grad_norm": 1.0665701627731323, "learning_rate": 2.8355862068965518e-05, "loss": 0.25, "step": 10831 }, { "epoch": 37.351724137931036, "grad_norm": 0.6904036402702332, "learning_rate": 2.8355402298850576e-05, "loss": 0.2084, "step": 10832 }, { "epoch": 37.355172413793106, "grad_norm": 0.5818563103675842, "learning_rate": 2.8354942528735635e-05, "loss": 0.1965, "step": 10833 }, { "epoch": 37.358620689655176, "grad_norm": 0.6645038723945618, "learning_rate": 2.835448275862069e-05, "loss": 0.1774, "step": 10834 }, { "epoch": 37.36206896551724, "grad_norm": 0.7504651546478271, "learning_rate": 2.835402298850575e-05, "loss": 0.1786, "step": 10835 }, { "epoch": 37.36551724137931, "grad_norm": 0.7709094882011414, "learning_rate": 2.8353563218390804e-05, "loss": 0.1931, "step": 10836 }, { "epoch": 37.36896551724138, "grad_norm": 0.5546725988388062, "learning_rate": 2.8353103448275863e-05, "loss": 0.1718, "step": 10837 }, { "epoch": 37.37241379310345, "grad_norm": 1.1659233570098877, "learning_rate": 2.835264367816092e-05, "loss": 0.1559, "step": 10838 }, { "epoch": 37.37586206896552, "grad_norm": 1.1990723609924316, "learning_rate": 2.8352183908045977e-05, "loss": 0.1489, "step": 10839 }, { "epoch": 37.37931034482759, "grad_norm": 1.1428561210632324, "learning_rate": 2.8351724137931035e-05, "loss": 0.1655, "step": 10840 }, { "epoch": 37.38275862068966, "grad_norm": 0.6184622049331665, "learning_rate": 2.8351264367816094e-05, "loss": 0.1623, "step": 10841 }, { "epoch": 37.38620689655173, "grad_norm": 0.6130128502845764, "learning_rate": 2.835080459770115e-05, "loss": 0.1566, "step": 10842 }, { "epoch": 37.389655172413796, "grad_norm": 1.302697777748108, "learning_rate": 2.8350344827586208e-05, "loss": 0.1596, "step": 10843 }, { "epoch": 37.39310344827586, "grad_norm": 0.6653267741203308, "learning_rate": 2.8349885057471263e-05, "loss": 0.1618, "step": 10844 }, { "epoch": 37.39655172413793, "grad_norm": 1.1052366495132446, "learning_rate": 2.8349425287356326e-05, "loss": 0.1489, "step": 10845 }, { "epoch": 37.4, "grad_norm": 0.7733346223831177, "learning_rate": 2.834896551724138e-05, "loss": 0.1393, "step": 10846 }, { "epoch": 37.40344827586207, "grad_norm": 0.8999082446098328, "learning_rate": 2.8348505747126436e-05, "loss": 0.1474, "step": 10847 }, { "epoch": 37.40689655172414, "grad_norm": 0.7027616500854492, "learning_rate": 2.8348045977011495e-05, "loss": 0.1535, "step": 10848 }, { "epoch": 37.41034482758621, "grad_norm": 0.6495761275291443, "learning_rate": 2.834758620689655e-05, "loss": 0.1443, "step": 10849 }, { "epoch": 37.41379310344828, "grad_norm": 0.9473036527633667, "learning_rate": 2.8347126436781612e-05, "loss": 0.1322, "step": 10850 }, { "epoch": 37.41724137931035, "grad_norm": 0.7176120281219482, "learning_rate": 2.8346666666666667e-05, "loss": 0.1192, "step": 10851 }, { "epoch": 37.42068965517242, "grad_norm": 1.4056499004364014, "learning_rate": 2.8346206896551723e-05, "loss": 0.1241, "step": 10852 }, { "epoch": 37.42413793103448, "grad_norm": 1.161744236946106, "learning_rate": 2.834574712643678e-05, "loss": 0.1292, "step": 10853 }, { "epoch": 37.42758620689655, "grad_norm": 0.611477792263031, "learning_rate": 2.834528735632184e-05, "loss": 0.1247, "step": 10854 }, { "epoch": 37.43103448275862, "grad_norm": 1.672537922859192, "learning_rate": 2.83448275862069e-05, "loss": 0.165, "step": 10855 }, { "epoch": 37.43448275862069, "grad_norm": 1.2588893175125122, "learning_rate": 2.8344367816091954e-05, "loss": 0.2763, "step": 10856 }, { "epoch": 37.43793103448276, "grad_norm": 0.7507724761962891, "learning_rate": 2.834390804597701e-05, "loss": 0.1959, "step": 10857 }, { "epoch": 37.44137931034483, "grad_norm": 0.5302934646606445, "learning_rate": 2.834344827586207e-05, "loss": 0.2143, "step": 10858 }, { "epoch": 37.4448275862069, "grad_norm": 1.0973082780838013, "learning_rate": 2.8342988505747127e-05, "loss": 0.1879, "step": 10859 }, { "epoch": 37.44827586206897, "grad_norm": 0.8813832402229309, "learning_rate": 2.8342528735632185e-05, "loss": 0.1978, "step": 10860 }, { "epoch": 37.45172413793104, "grad_norm": 0.8036226630210876, "learning_rate": 2.834206896551724e-05, "loss": 0.1762, "step": 10861 }, { "epoch": 37.4551724137931, "grad_norm": 0.7973728775978088, "learning_rate": 2.83416091954023e-05, "loss": 0.1934, "step": 10862 }, { "epoch": 37.45862068965517, "grad_norm": 0.9637789726257324, "learning_rate": 2.8341149425287358e-05, "loss": 0.1929, "step": 10863 }, { "epoch": 37.46206896551724, "grad_norm": 0.6340723633766174, "learning_rate": 2.8340689655172413e-05, "loss": 0.1781, "step": 10864 }, { "epoch": 37.46551724137931, "grad_norm": 0.47551804780960083, "learning_rate": 2.834022988505747e-05, "loss": 0.1577, "step": 10865 }, { "epoch": 37.46896551724138, "grad_norm": 1.402362585067749, "learning_rate": 2.833977011494253e-05, "loss": 0.1574, "step": 10866 }, { "epoch": 37.47241379310345, "grad_norm": 0.6964244842529297, "learning_rate": 2.8339310344827586e-05, "loss": 0.1617, "step": 10867 }, { "epoch": 37.47586206896552, "grad_norm": 0.6675231456756592, "learning_rate": 2.8338850574712645e-05, "loss": 0.1655, "step": 10868 }, { "epoch": 37.47931034482759, "grad_norm": 1.0918116569519043, "learning_rate": 2.83383908045977e-05, "loss": 0.1567, "step": 10869 }, { "epoch": 37.48275862068966, "grad_norm": 1.1589066982269287, "learning_rate": 2.833793103448276e-05, "loss": 0.1713, "step": 10870 }, { "epoch": 37.48620689655172, "grad_norm": 0.9679512977600098, "learning_rate": 2.8337471264367817e-05, "loss": 0.1536, "step": 10871 }, { "epoch": 37.48965517241379, "grad_norm": 0.794094443321228, "learning_rate": 2.8337011494252873e-05, "loss": 0.1523, "step": 10872 }, { "epoch": 37.49310344827586, "grad_norm": 1.3505103588104248, "learning_rate": 2.833655172413793e-05, "loss": 0.1458, "step": 10873 }, { "epoch": 37.49655172413793, "grad_norm": 0.5682282447814941, "learning_rate": 2.833609195402299e-05, "loss": 0.1334, "step": 10874 }, { "epoch": 37.5, "grad_norm": 0.6101197600364685, "learning_rate": 2.8335632183908045e-05, "loss": 0.1409, "step": 10875 }, { "epoch": 37.50344827586207, "grad_norm": 1.2378404140472412, "learning_rate": 2.8335172413793104e-05, "loss": 0.1504, "step": 10876 }, { "epoch": 37.50689655172414, "grad_norm": 1.0103638172149658, "learning_rate": 2.833471264367816e-05, "loss": 0.1245, "step": 10877 }, { "epoch": 37.51034482758621, "grad_norm": 0.7554375529289246, "learning_rate": 2.833425287356322e-05, "loss": 0.1131, "step": 10878 }, { "epoch": 37.51379310344828, "grad_norm": 0.8565630912780762, "learning_rate": 2.8333793103448277e-05, "loss": 0.126, "step": 10879 }, { "epoch": 37.51724137931034, "grad_norm": 1.1093565225601196, "learning_rate": 2.8333333333333332e-05, "loss": 0.2198, "step": 10880 }, { "epoch": 37.52068965517241, "grad_norm": 0.7986505031585693, "learning_rate": 2.833287356321839e-05, "loss": 0.2385, "step": 10881 }, { "epoch": 37.52413793103448, "grad_norm": 0.683953583240509, "learning_rate": 2.833241379310345e-05, "loss": 0.174, "step": 10882 }, { "epoch": 37.52758620689655, "grad_norm": 0.7558764219284058, "learning_rate": 2.8331954022988508e-05, "loss": 0.1857, "step": 10883 }, { "epoch": 37.53103448275862, "grad_norm": 0.5582343935966492, "learning_rate": 2.8331494252873563e-05, "loss": 0.1885, "step": 10884 }, { "epoch": 37.53448275862069, "grad_norm": 0.7371267080307007, "learning_rate": 2.833103448275862e-05, "loss": 0.1978, "step": 10885 }, { "epoch": 37.53793103448276, "grad_norm": 0.8880844712257385, "learning_rate": 2.833057471264368e-05, "loss": 0.186, "step": 10886 }, { "epoch": 37.54137931034483, "grad_norm": 0.7030354738235474, "learning_rate": 2.8330114942528736e-05, "loss": 0.1672, "step": 10887 }, { "epoch": 37.5448275862069, "grad_norm": 0.8933027386665344, "learning_rate": 2.8329655172413795e-05, "loss": 0.2053, "step": 10888 }, { "epoch": 37.54827586206896, "grad_norm": 0.6631677150726318, "learning_rate": 2.832919540229885e-05, "loss": 0.157, "step": 10889 }, { "epoch": 37.55172413793103, "grad_norm": 0.720568060874939, "learning_rate": 2.832873563218391e-05, "loss": 0.1705, "step": 10890 }, { "epoch": 37.5551724137931, "grad_norm": 1.3516345024108887, "learning_rate": 2.8328275862068967e-05, "loss": 0.1622, "step": 10891 }, { "epoch": 37.55862068965517, "grad_norm": 0.9326549172401428, "learning_rate": 2.8327816091954023e-05, "loss": 0.1682, "step": 10892 }, { "epoch": 37.56206896551724, "grad_norm": 0.6900548338890076, "learning_rate": 2.832735632183908e-05, "loss": 0.16, "step": 10893 }, { "epoch": 37.56551724137931, "grad_norm": 0.5067406892776489, "learning_rate": 2.832689655172414e-05, "loss": 0.1631, "step": 10894 }, { "epoch": 37.56896551724138, "grad_norm": 0.6998564600944519, "learning_rate": 2.8326436781609195e-05, "loss": 0.1673, "step": 10895 }, { "epoch": 37.57241379310345, "grad_norm": 0.7242847084999084, "learning_rate": 2.8325977011494254e-05, "loss": 0.1539, "step": 10896 }, { "epoch": 37.57586206896552, "grad_norm": 1.3596205711364746, "learning_rate": 2.832551724137931e-05, "loss": 0.1399, "step": 10897 }, { "epoch": 37.57931034482758, "grad_norm": 0.6195371747016907, "learning_rate": 2.8325057471264368e-05, "loss": 0.1341, "step": 10898 }, { "epoch": 37.58275862068965, "grad_norm": 0.6871008276939392, "learning_rate": 2.8324597701149427e-05, "loss": 0.1214, "step": 10899 }, { "epoch": 37.58620689655172, "grad_norm": 1.94680655002594, "learning_rate": 2.8324137931034482e-05, "loss": 0.1318, "step": 10900 }, { "epoch": 37.58965517241379, "grad_norm": 1.4512197971343994, "learning_rate": 2.832367816091954e-05, "loss": 0.1593, "step": 10901 }, { "epoch": 37.59310344827586, "grad_norm": 0.7419567108154297, "learning_rate": 2.83232183908046e-05, "loss": 0.1581, "step": 10902 }, { "epoch": 37.59655172413793, "grad_norm": 1.4841864109039307, "learning_rate": 2.8322758620689655e-05, "loss": 0.1259, "step": 10903 }, { "epoch": 37.6, "grad_norm": 1.4851298332214355, "learning_rate": 2.8322298850574713e-05, "loss": 0.1401, "step": 10904 }, { "epoch": 37.60344827586207, "grad_norm": 2.1920464038848877, "learning_rate": 2.832183908045977e-05, "loss": 0.1624, "step": 10905 }, { "epoch": 37.60689655172414, "grad_norm": 0.5137519240379333, "learning_rate": 2.832137931034483e-05, "loss": 0.2522, "step": 10906 }, { "epoch": 37.610344827586204, "grad_norm": 0.7236684560775757, "learning_rate": 2.8320919540229886e-05, "loss": 0.2063, "step": 10907 }, { "epoch": 37.61379310344827, "grad_norm": 0.524254560470581, "learning_rate": 2.832045977011494e-05, "loss": 0.2119, "step": 10908 }, { "epoch": 37.61724137931034, "grad_norm": 0.869421124458313, "learning_rate": 2.832e-05, "loss": 0.1947, "step": 10909 }, { "epoch": 37.62068965517241, "grad_norm": 0.6238872408866882, "learning_rate": 2.831954022988506e-05, "loss": 0.1695, "step": 10910 }, { "epoch": 37.62413793103448, "grad_norm": 0.4922725558280945, "learning_rate": 2.8319080459770117e-05, "loss": 0.1631, "step": 10911 }, { "epoch": 37.62758620689655, "grad_norm": 1.470910906791687, "learning_rate": 2.8318620689655173e-05, "loss": 0.1716, "step": 10912 }, { "epoch": 37.63103448275862, "grad_norm": 0.7542740702629089, "learning_rate": 2.8318160919540228e-05, "loss": 0.1703, "step": 10913 }, { "epoch": 37.63448275862069, "grad_norm": 0.6379086375236511, "learning_rate": 2.831770114942529e-05, "loss": 0.1583, "step": 10914 }, { "epoch": 37.63793103448276, "grad_norm": 1.4553357362747192, "learning_rate": 2.8317241379310345e-05, "loss": 0.1577, "step": 10915 }, { "epoch": 37.641379310344824, "grad_norm": 0.672191858291626, "learning_rate": 2.8316781609195404e-05, "loss": 0.1668, "step": 10916 }, { "epoch": 37.644827586206894, "grad_norm": 0.5237268805503845, "learning_rate": 2.831632183908046e-05, "loss": 0.1675, "step": 10917 }, { "epoch": 37.648275862068964, "grad_norm": 0.6044228076934814, "learning_rate": 2.8315862068965518e-05, "loss": 0.1529, "step": 10918 }, { "epoch": 37.65172413793103, "grad_norm": 0.5725052356719971, "learning_rate": 2.8315402298850577e-05, "loss": 0.1656, "step": 10919 }, { "epoch": 37.6551724137931, "grad_norm": 0.8162844777107239, "learning_rate": 2.8314942528735632e-05, "loss": 0.1744, "step": 10920 }, { "epoch": 37.65862068965517, "grad_norm": 0.6074329018592834, "learning_rate": 2.831448275862069e-05, "loss": 0.1378, "step": 10921 }, { "epoch": 37.66206896551724, "grad_norm": 0.6308432221412659, "learning_rate": 2.831402298850575e-05, "loss": 0.1593, "step": 10922 }, { "epoch": 37.66551724137931, "grad_norm": 0.6315426230430603, "learning_rate": 2.8313563218390805e-05, "loss": 0.1564, "step": 10923 }, { "epoch": 37.66896551724138, "grad_norm": 0.6384207010269165, "learning_rate": 2.8313103448275863e-05, "loss": 0.1325, "step": 10924 }, { "epoch": 37.672413793103445, "grad_norm": 0.6017013192176819, "learning_rate": 2.831264367816092e-05, "loss": 0.1263, "step": 10925 }, { "epoch": 37.675862068965515, "grad_norm": 0.9407339096069336, "learning_rate": 2.8312183908045977e-05, "loss": 0.1376, "step": 10926 }, { "epoch": 37.679310344827584, "grad_norm": 0.6754845976829529, "learning_rate": 2.8311724137931036e-05, "loss": 0.1248, "step": 10927 }, { "epoch": 37.682758620689654, "grad_norm": 0.9203807711601257, "learning_rate": 2.831126436781609e-05, "loss": 0.1267, "step": 10928 }, { "epoch": 37.686206896551724, "grad_norm": 0.6475149989128113, "learning_rate": 2.831080459770115e-05, "loss": 0.1268, "step": 10929 }, { "epoch": 37.689655172413794, "grad_norm": 1.1859956979751587, "learning_rate": 2.831034482758621e-05, "loss": 0.1634, "step": 10930 }, { "epoch": 37.69310344827586, "grad_norm": 0.9528568983078003, "learning_rate": 2.8309885057471264e-05, "loss": 0.219, "step": 10931 }, { "epoch": 37.69655172413793, "grad_norm": 0.5126023888587952, "learning_rate": 2.8309425287356323e-05, "loss": 0.2003, "step": 10932 }, { "epoch": 37.7, "grad_norm": 0.7427054643630981, "learning_rate": 2.8308965517241378e-05, "loss": 0.1746, "step": 10933 }, { "epoch": 37.703448275862065, "grad_norm": 0.45747289061546326, "learning_rate": 2.830850574712644e-05, "loss": 0.1844, "step": 10934 }, { "epoch": 37.706896551724135, "grad_norm": 0.4675343632698059, "learning_rate": 2.8308045977011495e-05, "loss": 0.176, "step": 10935 }, { "epoch": 37.710344827586205, "grad_norm": 0.5288020968437195, "learning_rate": 2.830758620689655e-05, "loss": 0.1548, "step": 10936 }, { "epoch": 37.713793103448275, "grad_norm": 0.7215676307678223, "learning_rate": 2.830712643678161e-05, "loss": 0.1771, "step": 10937 }, { "epoch": 37.717241379310344, "grad_norm": 0.8963896036148071, "learning_rate": 2.8306666666666668e-05, "loss": 0.1923, "step": 10938 }, { "epoch": 37.720689655172414, "grad_norm": 0.8587925434112549, "learning_rate": 2.8306206896551727e-05, "loss": 0.1868, "step": 10939 }, { "epoch": 37.724137931034484, "grad_norm": 0.5643771886825562, "learning_rate": 2.8305747126436782e-05, "loss": 0.1797, "step": 10940 }, { "epoch": 37.727586206896554, "grad_norm": 0.692358672618866, "learning_rate": 2.8305287356321837e-05, "loss": 0.1682, "step": 10941 }, { "epoch": 37.73103448275862, "grad_norm": 1.644950032234192, "learning_rate": 2.83048275862069e-05, "loss": 0.1993, "step": 10942 }, { "epoch": 37.734482758620686, "grad_norm": 0.6610879302024841, "learning_rate": 2.8304367816091954e-05, "loss": 0.1549, "step": 10943 }, { "epoch": 37.737931034482756, "grad_norm": 0.4906373620033264, "learning_rate": 2.8303908045977013e-05, "loss": 0.1616, "step": 10944 }, { "epoch": 37.741379310344826, "grad_norm": 0.6211592555046082, "learning_rate": 2.830344827586207e-05, "loss": 0.1636, "step": 10945 }, { "epoch": 37.744827586206895, "grad_norm": 0.6284118294715881, "learning_rate": 2.8302988505747127e-05, "loss": 0.1187, "step": 10946 }, { "epoch": 37.748275862068965, "grad_norm": 0.624556303024292, "learning_rate": 2.8302528735632186e-05, "loss": 0.1545, "step": 10947 }, { "epoch": 37.751724137931035, "grad_norm": 0.6253766417503357, "learning_rate": 2.830206896551724e-05, "loss": 0.1376, "step": 10948 }, { "epoch": 37.755172413793105, "grad_norm": 0.5626201033592224, "learning_rate": 2.83016091954023e-05, "loss": 0.1366, "step": 10949 }, { "epoch": 37.758620689655174, "grad_norm": 0.5595510005950928, "learning_rate": 2.830114942528736e-05, "loss": 0.1245, "step": 10950 }, { "epoch": 37.762068965517244, "grad_norm": 0.651328980922699, "learning_rate": 2.8300689655172414e-05, "loss": 0.1307, "step": 10951 }, { "epoch": 37.765517241379314, "grad_norm": 1.2826260328292847, "learning_rate": 2.8300229885057472e-05, "loss": 0.106, "step": 10952 }, { "epoch": 37.76896551724138, "grad_norm": 0.8153067827224731, "learning_rate": 2.8299770114942528e-05, "loss": 0.1418, "step": 10953 }, { "epoch": 37.772413793103446, "grad_norm": 1.5484859943389893, "learning_rate": 2.8299310344827586e-05, "loss": 0.1386, "step": 10954 }, { "epoch": 37.775862068965516, "grad_norm": 2.7210636138916016, "learning_rate": 2.8298850574712645e-05, "loss": 0.2232, "step": 10955 }, { "epoch": 37.779310344827586, "grad_norm": 0.787896990776062, "learning_rate": 2.82983908045977e-05, "loss": 0.2657, "step": 10956 }, { "epoch": 37.782758620689656, "grad_norm": 0.5181117057800293, "learning_rate": 2.829793103448276e-05, "loss": 0.1795, "step": 10957 }, { "epoch": 37.786206896551725, "grad_norm": 0.8566206693649292, "learning_rate": 2.8297471264367818e-05, "loss": 0.2126, "step": 10958 }, { "epoch": 37.789655172413795, "grad_norm": 0.6774371862411499, "learning_rate": 2.8297011494252873e-05, "loss": 0.2329, "step": 10959 }, { "epoch": 37.793103448275865, "grad_norm": 1.164684534072876, "learning_rate": 2.8296551724137932e-05, "loss": 0.185, "step": 10960 }, { "epoch": 37.796551724137935, "grad_norm": 0.8060206770896912, "learning_rate": 2.8296091954022987e-05, "loss": 0.1791, "step": 10961 }, { "epoch": 37.8, "grad_norm": 0.9427183270454407, "learning_rate": 2.829563218390805e-05, "loss": 0.1908, "step": 10962 }, { "epoch": 37.80344827586207, "grad_norm": 0.532426655292511, "learning_rate": 2.8295172413793104e-05, "loss": 0.1752, "step": 10963 }, { "epoch": 37.80689655172414, "grad_norm": 0.8092300891876221, "learning_rate": 2.829471264367816e-05, "loss": 0.1501, "step": 10964 }, { "epoch": 37.810344827586206, "grad_norm": 0.5543993711471558, "learning_rate": 2.829425287356322e-05, "loss": 0.1642, "step": 10965 }, { "epoch": 37.813793103448276, "grad_norm": 1.4911452531814575, "learning_rate": 2.8293793103448277e-05, "loss": 0.1512, "step": 10966 }, { "epoch": 37.817241379310346, "grad_norm": 2.5904908180236816, "learning_rate": 2.8293333333333336e-05, "loss": 0.1594, "step": 10967 }, { "epoch": 37.820689655172416, "grad_norm": 1.139225721359253, "learning_rate": 2.829287356321839e-05, "loss": 0.1557, "step": 10968 }, { "epoch": 37.824137931034485, "grad_norm": 0.6630194783210754, "learning_rate": 2.8292413793103446e-05, "loss": 0.1632, "step": 10969 }, { "epoch": 37.827586206896555, "grad_norm": 0.8255152106285095, "learning_rate": 2.829195402298851e-05, "loss": 0.1375, "step": 10970 }, { "epoch": 37.83103448275862, "grad_norm": 1.8514176607131958, "learning_rate": 2.8291494252873564e-05, "loss": 0.149, "step": 10971 }, { "epoch": 37.83448275862069, "grad_norm": 0.8214032649993896, "learning_rate": 2.8291034482758622e-05, "loss": 0.1584, "step": 10972 }, { "epoch": 37.83793103448276, "grad_norm": 0.8083770871162415, "learning_rate": 2.8290574712643678e-05, "loss": 0.1447, "step": 10973 }, { "epoch": 37.84137931034483, "grad_norm": 0.8090946078300476, "learning_rate": 2.8290114942528736e-05, "loss": 0.1378, "step": 10974 }, { "epoch": 37.8448275862069, "grad_norm": 0.6819348931312561, "learning_rate": 2.8289655172413795e-05, "loss": 0.1555, "step": 10975 }, { "epoch": 37.84827586206897, "grad_norm": 1.401888370513916, "learning_rate": 2.828919540229885e-05, "loss": 0.1411, "step": 10976 }, { "epoch": 37.851724137931036, "grad_norm": 1.3130310773849487, "learning_rate": 2.828873563218391e-05, "loss": 0.1362, "step": 10977 }, { "epoch": 37.855172413793106, "grad_norm": 1.215781569480896, "learning_rate": 2.8288275862068968e-05, "loss": 0.1466, "step": 10978 }, { "epoch": 37.858620689655176, "grad_norm": 0.7697969079017639, "learning_rate": 2.8287816091954023e-05, "loss": 0.1402, "step": 10979 }, { "epoch": 37.86206896551724, "grad_norm": 1.2649232149124146, "learning_rate": 2.828735632183908e-05, "loss": 0.1973, "step": 10980 }, { "epoch": 37.86551724137931, "grad_norm": 0.5672886371612549, "learning_rate": 2.8286896551724137e-05, "loss": 0.2147, "step": 10981 }, { "epoch": 37.86896551724138, "grad_norm": 1.0458433628082275, "learning_rate": 2.8286436781609196e-05, "loss": 0.1797, "step": 10982 }, { "epoch": 37.87241379310345, "grad_norm": 0.6511807441711426, "learning_rate": 2.8285977011494254e-05, "loss": 0.1834, "step": 10983 }, { "epoch": 37.87586206896552, "grad_norm": 0.6734592318534851, "learning_rate": 2.828551724137931e-05, "loss": 0.1767, "step": 10984 }, { "epoch": 37.87931034482759, "grad_norm": 0.9846782088279724, "learning_rate": 2.828505747126437e-05, "loss": 0.1658, "step": 10985 }, { "epoch": 37.88275862068966, "grad_norm": 0.778020977973938, "learning_rate": 2.8284597701149427e-05, "loss": 0.1884, "step": 10986 }, { "epoch": 37.88620689655173, "grad_norm": 0.7936773896217346, "learning_rate": 2.8284137931034482e-05, "loss": 0.1869, "step": 10987 }, { "epoch": 37.889655172413796, "grad_norm": 7.3912353515625, "learning_rate": 2.828367816091954e-05, "loss": 0.1915, "step": 10988 }, { "epoch": 37.89310344827586, "grad_norm": 1.3535350561141968, "learning_rate": 2.8283218390804596e-05, "loss": 0.168, "step": 10989 }, { "epoch": 37.89655172413793, "grad_norm": 1.112752914428711, "learning_rate": 2.828275862068966e-05, "loss": 0.1591, "step": 10990 }, { "epoch": 37.9, "grad_norm": 0.635279655456543, "learning_rate": 2.8282298850574714e-05, "loss": 0.1767, "step": 10991 }, { "epoch": 37.90344827586207, "grad_norm": 0.7712584137916565, "learning_rate": 2.828183908045977e-05, "loss": 0.1717, "step": 10992 }, { "epoch": 37.90689655172414, "grad_norm": 0.6348098516464233, "learning_rate": 2.8281379310344828e-05, "loss": 0.1516, "step": 10993 }, { "epoch": 37.91034482758621, "grad_norm": 0.5146854519844055, "learning_rate": 2.8280919540229886e-05, "loss": 0.1459, "step": 10994 }, { "epoch": 37.91379310344828, "grad_norm": 1.0111312866210938, "learning_rate": 2.8280459770114945e-05, "loss": 0.1563, "step": 10995 }, { "epoch": 37.91724137931035, "grad_norm": 0.60120689868927, "learning_rate": 2.828e-05, "loss": 0.1755, "step": 10996 }, { "epoch": 37.92068965517242, "grad_norm": 0.8840502500534058, "learning_rate": 2.8279540229885056e-05, "loss": 0.1481, "step": 10997 }, { "epoch": 37.92413793103448, "grad_norm": 0.7667767405509949, "learning_rate": 2.8279080459770118e-05, "loss": 0.1427, "step": 10998 }, { "epoch": 37.92758620689655, "grad_norm": 0.8833067417144775, "learning_rate": 2.8278620689655173e-05, "loss": 0.1355, "step": 10999 }, { "epoch": 37.93103448275862, "grad_norm": 0.7686334848403931, "learning_rate": 2.827816091954023e-05, "loss": 0.1581, "step": 11000 }, { "epoch": 37.93103448275862, "eval_cer": 0.13103430500786103, "eval_loss": 0.3510456383228302, "eval_runtime": 17.7468, "eval_samples_per_second": 52.235, "eval_steps_per_second": 0.169, "eval_wer": 0.2996894409937888, "step": 11000 }, { "epoch": 37.93448275862069, "grad_norm": 2.244279146194458, "learning_rate": 2.8277701149425287e-05, "loss": 0.1306, "step": 11001 }, { "epoch": 37.93793103448276, "grad_norm": 0.8733891844749451, "learning_rate": 2.8277241379310346e-05, "loss": 0.1239, "step": 11002 }, { "epoch": 37.94137931034483, "grad_norm": 1.3579665422439575, "learning_rate": 2.8276781609195404e-05, "loss": 0.1501, "step": 11003 }, { "epoch": 37.9448275862069, "grad_norm": 0.8650179505348206, "learning_rate": 2.827632183908046e-05, "loss": 0.1442, "step": 11004 }, { "epoch": 37.94827586206897, "grad_norm": 1.1297799348831177, "learning_rate": 2.8275862068965518e-05, "loss": 0.2019, "step": 11005 }, { "epoch": 37.95172413793104, "grad_norm": 1.5747121572494507, "learning_rate": 2.8275402298850577e-05, "loss": 0.2113, "step": 11006 }, { "epoch": 37.9551724137931, "grad_norm": 0.7033880949020386, "learning_rate": 2.8274942528735632e-05, "loss": 0.1966, "step": 11007 }, { "epoch": 37.95862068965517, "grad_norm": 0.9622392654418945, "learning_rate": 2.827448275862069e-05, "loss": 0.203, "step": 11008 }, { "epoch": 37.96206896551724, "grad_norm": 0.8751043081283569, "learning_rate": 2.8274022988505746e-05, "loss": 0.1979, "step": 11009 }, { "epoch": 37.96551724137931, "grad_norm": 0.5318447947502136, "learning_rate": 2.8273563218390805e-05, "loss": 0.1627, "step": 11010 }, { "epoch": 37.96896551724138, "grad_norm": 0.8166975975036621, "learning_rate": 2.8273103448275864e-05, "loss": 0.1566, "step": 11011 }, { "epoch": 37.97241379310345, "grad_norm": 0.6261289119720459, "learning_rate": 2.827264367816092e-05, "loss": 0.1598, "step": 11012 }, { "epoch": 37.97586206896552, "grad_norm": 0.5284278392791748, "learning_rate": 2.8272183908045978e-05, "loss": 0.1562, "step": 11013 }, { "epoch": 37.97931034482759, "grad_norm": 0.6055872440338135, "learning_rate": 2.8271724137931036e-05, "loss": 0.1653, "step": 11014 }, { "epoch": 37.98275862068966, "grad_norm": 1.0563055276870728, "learning_rate": 2.827126436781609e-05, "loss": 0.1318, "step": 11015 }, { "epoch": 37.98620689655172, "grad_norm": 0.937923014163971, "learning_rate": 2.827080459770115e-05, "loss": 0.1388, "step": 11016 }, { "epoch": 37.98965517241379, "grad_norm": 0.6607889533042908, "learning_rate": 2.8270344827586206e-05, "loss": 0.1404, "step": 11017 }, { "epoch": 37.99310344827586, "grad_norm": 0.6529019474983215, "learning_rate": 2.8269885057471268e-05, "loss": 0.1232, "step": 11018 }, { "epoch": 37.99655172413793, "grad_norm": 0.7910711765289307, "learning_rate": 2.8269425287356323e-05, "loss": 0.1587, "step": 11019 }, { "epoch": 38.0, "grad_norm": 1.2357171773910522, "learning_rate": 2.8268965517241378e-05, "loss": 0.1834, "step": 11020 }, { "epoch": 38.00344827586207, "grad_norm": 1.288411259651184, "learning_rate": 2.8268505747126437e-05, "loss": 0.2112, "step": 11021 }, { "epoch": 38.00689655172414, "grad_norm": 1.1342320442199707, "learning_rate": 2.8268045977011496e-05, "loss": 0.196, "step": 11022 }, { "epoch": 38.01034482758621, "grad_norm": 0.6177983283996582, "learning_rate": 2.8267586206896554e-05, "loss": 0.1971, "step": 11023 }, { "epoch": 38.01379310344828, "grad_norm": 1.423910140991211, "learning_rate": 2.826712643678161e-05, "loss": 0.1956, "step": 11024 }, { "epoch": 38.01724137931034, "grad_norm": 0.557257354259491, "learning_rate": 2.8266666666666665e-05, "loss": 0.183, "step": 11025 }, { "epoch": 38.02068965517241, "grad_norm": 0.6663224101066589, "learning_rate": 2.8266206896551727e-05, "loss": 0.1608, "step": 11026 }, { "epoch": 38.02413793103448, "grad_norm": 0.7377750873565674, "learning_rate": 2.8265747126436782e-05, "loss": 0.1814, "step": 11027 }, { "epoch": 38.02758620689655, "grad_norm": 1.8387467861175537, "learning_rate": 2.826528735632184e-05, "loss": 0.1764, "step": 11028 }, { "epoch": 38.03103448275862, "grad_norm": 0.6119486093521118, "learning_rate": 2.8264827586206896e-05, "loss": 0.1495, "step": 11029 }, { "epoch": 38.03448275862069, "grad_norm": 0.5738792419433594, "learning_rate": 2.8264367816091955e-05, "loss": 0.169, "step": 11030 }, { "epoch": 38.03793103448276, "grad_norm": 1.0546351671218872, "learning_rate": 2.8263908045977014e-05, "loss": 0.167, "step": 11031 }, { "epoch": 38.04137931034483, "grad_norm": 0.576999843120575, "learning_rate": 2.826344827586207e-05, "loss": 0.1506, "step": 11032 }, { "epoch": 38.0448275862069, "grad_norm": 0.5732587575912476, "learning_rate": 2.8262988505747128e-05, "loss": 0.1443, "step": 11033 }, { "epoch": 38.04827586206896, "grad_norm": 1.4059163331985474, "learning_rate": 2.8262528735632186e-05, "loss": 0.1423, "step": 11034 }, { "epoch": 38.05172413793103, "grad_norm": 1.0474202632904053, "learning_rate": 2.826206896551724e-05, "loss": 0.1283, "step": 11035 }, { "epoch": 38.0551724137931, "grad_norm": 0.6787098050117493, "learning_rate": 2.82616091954023e-05, "loss": 0.1193, "step": 11036 }, { "epoch": 38.05862068965517, "grad_norm": 2.1983323097229004, "learning_rate": 2.8261149425287355e-05, "loss": 0.1413, "step": 11037 }, { "epoch": 38.06206896551724, "grad_norm": 0.5369946360588074, "learning_rate": 2.8260689655172418e-05, "loss": 0.1137, "step": 11038 }, { "epoch": 38.06551724137931, "grad_norm": 0.7499077916145325, "learning_rate": 2.8260229885057473e-05, "loss": 0.1353, "step": 11039 }, { "epoch": 38.06896551724138, "grad_norm": 1.709822177886963, "learning_rate": 2.8259770114942528e-05, "loss": 0.13, "step": 11040 }, { "epoch": 38.07241379310345, "grad_norm": 1.5337316989898682, "learning_rate": 2.8259310344827587e-05, "loss": 0.1139, "step": 11041 }, { "epoch": 38.07586206896552, "grad_norm": 0.6773917078971863, "learning_rate": 2.8258850574712645e-05, "loss": 0.1149, "step": 11042 }, { "epoch": 38.07931034482758, "grad_norm": 0.7857959270477295, "learning_rate": 2.82583908045977e-05, "loss": 0.1131, "step": 11043 }, { "epoch": 38.08275862068965, "grad_norm": 1.449794054031372, "learning_rate": 2.825793103448276e-05, "loss": 0.133, "step": 11044 }, { "epoch": 38.08620689655172, "grad_norm": 1.6826680898666382, "learning_rate": 2.8257471264367815e-05, "loss": 0.1824, "step": 11045 }, { "epoch": 38.08965517241379, "grad_norm": 1.0662840604782104, "learning_rate": 2.8257011494252877e-05, "loss": 0.223, "step": 11046 }, { "epoch": 38.09310344827586, "grad_norm": 0.9584540724754333, "learning_rate": 2.8256551724137932e-05, "loss": 0.1891, "step": 11047 }, { "epoch": 38.09655172413793, "grad_norm": 0.53106689453125, "learning_rate": 2.8256091954022987e-05, "loss": 0.1755, "step": 11048 }, { "epoch": 38.1, "grad_norm": 0.59113609790802, "learning_rate": 2.8255632183908046e-05, "loss": 0.1854, "step": 11049 }, { "epoch": 38.10344827586207, "grad_norm": 1.7208354473114014, "learning_rate": 2.8255172413793105e-05, "loss": 0.1701, "step": 11050 }, { "epoch": 38.10689655172414, "grad_norm": 0.7906379103660583, "learning_rate": 2.8254712643678163e-05, "loss": 0.1484, "step": 11051 }, { "epoch": 38.110344827586204, "grad_norm": 0.6353622674942017, "learning_rate": 2.825425287356322e-05, "loss": 0.1662, "step": 11052 }, { "epoch": 38.11379310344827, "grad_norm": 0.9228904247283936, "learning_rate": 2.8253793103448274e-05, "loss": 0.172, "step": 11053 }, { "epoch": 38.11724137931034, "grad_norm": 1.6312378644943237, "learning_rate": 2.8253333333333336e-05, "loss": 0.1681, "step": 11054 }, { "epoch": 38.12068965517241, "grad_norm": 0.7363501787185669, "learning_rate": 2.825287356321839e-05, "loss": 0.15, "step": 11055 }, { "epoch": 38.12413793103448, "grad_norm": 1.122572660446167, "learning_rate": 2.825241379310345e-05, "loss": 0.1682, "step": 11056 }, { "epoch": 38.12758620689655, "grad_norm": 2.118699073791504, "learning_rate": 2.8251954022988505e-05, "loss": 0.1745, "step": 11057 }, { "epoch": 38.13103448275862, "grad_norm": 0.5879372358322144, "learning_rate": 2.8251494252873564e-05, "loss": 0.1575, "step": 11058 }, { "epoch": 38.13448275862069, "grad_norm": 0.949296236038208, "learning_rate": 2.8251034482758623e-05, "loss": 0.1591, "step": 11059 }, { "epoch": 38.13793103448276, "grad_norm": 0.6345550417900085, "learning_rate": 2.8250574712643678e-05, "loss": 0.1834, "step": 11060 }, { "epoch": 38.141379310344824, "grad_norm": 0.9381221532821655, "learning_rate": 2.8250114942528737e-05, "loss": 0.1406, "step": 11061 }, { "epoch": 38.144827586206894, "grad_norm": 0.6490294337272644, "learning_rate": 2.8249655172413795e-05, "loss": 0.1411, "step": 11062 }, { "epoch": 38.148275862068964, "grad_norm": 0.621123194694519, "learning_rate": 2.824919540229885e-05, "loss": 0.1378, "step": 11063 }, { "epoch": 38.15172413793103, "grad_norm": 0.6010726094245911, "learning_rate": 2.824873563218391e-05, "loss": 0.1251, "step": 11064 }, { "epoch": 38.1551724137931, "grad_norm": 0.844870924949646, "learning_rate": 2.8248275862068965e-05, "loss": 0.1337, "step": 11065 }, { "epoch": 38.15862068965517, "grad_norm": 0.9412791728973389, "learning_rate": 2.8247816091954027e-05, "loss": 0.1257, "step": 11066 }, { "epoch": 38.16206896551724, "grad_norm": 3.892554521560669, "learning_rate": 2.8247356321839082e-05, "loss": 0.1335, "step": 11067 }, { "epoch": 38.16551724137931, "grad_norm": 1.4252945184707642, "learning_rate": 2.8246896551724137e-05, "loss": 0.1256, "step": 11068 }, { "epoch": 38.16896551724138, "grad_norm": 0.8877184391021729, "learning_rate": 2.8246436781609196e-05, "loss": 0.1303, "step": 11069 }, { "epoch": 38.172413793103445, "grad_norm": 2.387632369995117, "learning_rate": 2.8245977011494255e-05, "loss": 0.1634, "step": 11070 }, { "epoch": 38.175862068965515, "grad_norm": 0.6913253664970398, "learning_rate": 2.824551724137931e-05, "loss": 0.2395, "step": 11071 }, { "epoch": 38.179310344827584, "grad_norm": 0.519919753074646, "learning_rate": 2.824505747126437e-05, "loss": 0.2057, "step": 11072 }, { "epoch": 38.182758620689654, "grad_norm": 0.8646356463432312, "learning_rate": 2.8244597701149424e-05, "loss": 0.1801, "step": 11073 }, { "epoch": 38.186206896551724, "grad_norm": 0.6832991242408752, "learning_rate": 2.8244137931034486e-05, "loss": 0.173, "step": 11074 }, { "epoch": 38.189655172413794, "grad_norm": 0.791799783706665, "learning_rate": 2.824367816091954e-05, "loss": 0.1578, "step": 11075 }, { "epoch": 38.19310344827586, "grad_norm": 0.8161206841468811, "learning_rate": 2.8243218390804597e-05, "loss": 0.1594, "step": 11076 }, { "epoch": 38.19655172413793, "grad_norm": 0.5462909936904907, "learning_rate": 2.8242758620689655e-05, "loss": 0.1881, "step": 11077 }, { "epoch": 38.2, "grad_norm": 0.623665988445282, "learning_rate": 2.8242298850574714e-05, "loss": 0.174, "step": 11078 }, { "epoch": 38.203448275862065, "grad_norm": 0.6887049078941345, "learning_rate": 2.8241839080459773e-05, "loss": 0.166, "step": 11079 }, { "epoch": 38.206896551724135, "grad_norm": 0.7617496848106384, "learning_rate": 2.8241379310344828e-05, "loss": 0.1421, "step": 11080 }, { "epoch": 38.210344827586205, "grad_norm": 1.2492121458053589, "learning_rate": 2.8240919540229883e-05, "loss": 0.1299, "step": 11081 }, { "epoch": 38.213793103448275, "grad_norm": 0.5571796298027039, "learning_rate": 2.8240459770114945e-05, "loss": 0.1636, "step": 11082 }, { "epoch": 38.217241379310344, "grad_norm": 0.548718273639679, "learning_rate": 2.824e-05, "loss": 0.1412, "step": 11083 }, { "epoch": 38.220689655172414, "grad_norm": 0.5747441053390503, "learning_rate": 2.823954022988506e-05, "loss": 0.1289, "step": 11084 }, { "epoch": 38.224137931034484, "grad_norm": 1.056795358657837, "learning_rate": 2.8239080459770115e-05, "loss": 0.1525, "step": 11085 }, { "epoch": 38.227586206896554, "grad_norm": 0.6217753887176514, "learning_rate": 2.8238620689655173e-05, "loss": 0.1095, "step": 11086 }, { "epoch": 38.23103448275862, "grad_norm": 0.5540239214897156, "learning_rate": 2.8238160919540232e-05, "loss": 0.1128, "step": 11087 }, { "epoch": 38.234482758620686, "grad_norm": 0.7455006837844849, "learning_rate": 2.8237701149425287e-05, "loss": 0.1238, "step": 11088 }, { "epoch": 38.237931034482756, "grad_norm": 0.701106071472168, "learning_rate": 2.8237241379310346e-05, "loss": 0.141, "step": 11089 }, { "epoch": 38.241379310344826, "grad_norm": 0.6624748706817627, "learning_rate": 2.8236781609195405e-05, "loss": 0.1147, "step": 11090 }, { "epoch": 38.244827586206895, "grad_norm": 1.6331150531768799, "learning_rate": 2.823632183908046e-05, "loss": 0.1235, "step": 11091 }, { "epoch": 38.248275862068965, "grad_norm": 0.73601895570755, "learning_rate": 2.823586206896552e-05, "loss": 0.133, "step": 11092 }, { "epoch": 38.251724137931035, "grad_norm": 0.6321110725402832, "learning_rate": 2.8235402298850574e-05, "loss": 0.1053, "step": 11093 }, { "epoch": 38.255172413793105, "grad_norm": 0.8897535800933838, "learning_rate": 2.8234942528735636e-05, "loss": 0.1272, "step": 11094 }, { "epoch": 38.258620689655174, "grad_norm": 1.3054646253585815, "learning_rate": 2.823448275862069e-05, "loss": 0.1575, "step": 11095 }, { "epoch": 38.262068965517244, "grad_norm": 0.8358314037322998, "learning_rate": 2.8234022988505747e-05, "loss": 0.2264, "step": 11096 }, { "epoch": 38.265517241379314, "grad_norm": 0.785050630569458, "learning_rate": 2.8233563218390805e-05, "loss": 0.1888, "step": 11097 }, { "epoch": 38.26896551724138, "grad_norm": 0.5590908527374268, "learning_rate": 2.8233103448275864e-05, "loss": 0.1773, "step": 11098 }, { "epoch": 38.272413793103446, "grad_norm": 0.7016497850418091, "learning_rate": 2.823264367816092e-05, "loss": 0.1844, "step": 11099 }, { "epoch": 38.275862068965516, "grad_norm": 0.698447048664093, "learning_rate": 2.8232183908045978e-05, "loss": 0.174, "step": 11100 }, { "epoch": 38.279310344827586, "grad_norm": 0.6120927929878235, "learning_rate": 2.8231724137931033e-05, "loss": 0.1828, "step": 11101 }, { "epoch": 38.282758620689656, "grad_norm": 0.5784707069396973, "learning_rate": 2.8231264367816095e-05, "loss": 0.1687, "step": 11102 }, { "epoch": 38.286206896551725, "grad_norm": 0.9990240931510925, "learning_rate": 2.823080459770115e-05, "loss": 0.1852, "step": 11103 }, { "epoch": 38.289655172413795, "grad_norm": 0.6674383878707886, "learning_rate": 2.8230344827586206e-05, "loss": 0.1368, "step": 11104 }, { "epoch": 38.293103448275865, "grad_norm": 0.5530248880386353, "learning_rate": 2.8229885057471265e-05, "loss": 0.1575, "step": 11105 }, { "epoch": 38.296551724137935, "grad_norm": 0.9319071769714355, "learning_rate": 2.8229425287356323e-05, "loss": 0.1595, "step": 11106 }, { "epoch": 38.3, "grad_norm": 1.5229263305664062, "learning_rate": 2.8228965517241382e-05, "loss": 0.1718, "step": 11107 }, { "epoch": 38.30344827586207, "grad_norm": 0.8065203428268433, "learning_rate": 2.8228505747126437e-05, "loss": 0.1456, "step": 11108 }, { "epoch": 38.30689655172414, "grad_norm": 0.522131621837616, "learning_rate": 2.8228045977011493e-05, "loss": 0.1281, "step": 11109 }, { "epoch": 38.310344827586206, "grad_norm": 2.7977569103240967, "learning_rate": 2.8227586206896555e-05, "loss": 0.1345, "step": 11110 }, { "epoch": 38.313793103448276, "grad_norm": 0.5845361351966858, "learning_rate": 2.822712643678161e-05, "loss": 0.1277, "step": 11111 }, { "epoch": 38.317241379310346, "grad_norm": 1.2738466262817383, "learning_rate": 2.822666666666667e-05, "loss": 0.1467, "step": 11112 }, { "epoch": 38.320689655172416, "grad_norm": 0.6461541056632996, "learning_rate": 2.8226206896551724e-05, "loss": 0.1256, "step": 11113 }, { "epoch": 38.324137931034485, "grad_norm": 1.1052881479263306, "learning_rate": 2.8225747126436783e-05, "loss": 0.1244, "step": 11114 }, { "epoch": 38.327586206896555, "grad_norm": 1.7344257831573486, "learning_rate": 2.822528735632184e-05, "loss": 0.1138, "step": 11115 }, { "epoch": 38.33103448275862, "grad_norm": 1.1886646747589111, "learning_rate": 2.8224827586206897e-05, "loss": 0.1311, "step": 11116 }, { "epoch": 38.33448275862069, "grad_norm": 1.0006709098815918, "learning_rate": 2.8224367816091955e-05, "loss": 0.1282, "step": 11117 }, { "epoch": 38.33793103448276, "grad_norm": 1.5593358278274536, "learning_rate": 2.8223908045977014e-05, "loss": 0.1271, "step": 11118 }, { "epoch": 38.34137931034483, "grad_norm": 5.1732940673828125, "learning_rate": 2.822344827586207e-05, "loss": 0.1236, "step": 11119 }, { "epoch": 38.3448275862069, "grad_norm": 3.5342190265655518, "learning_rate": 2.8222988505747128e-05, "loss": 0.1547, "step": 11120 }, { "epoch": 38.34827586206897, "grad_norm": 0.6172340512275696, "learning_rate": 2.8222528735632183e-05, "loss": 0.2441, "step": 11121 }, { "epoch": 38.351724137931036, "grad_norm": 1.1009495258331299, "learning_rate": 2.8222068965517245e-05, "loss": 0.1905, "step": 11122 }, { "epoch": 38.355172413793106, "grad_norm": 0.8726471662521362, "learning_rate": 2.82216091954023e-05, "loss": 0.1886, "step": 11123 }, { "epoch": 38.358620689655176, "grad_norm": 0.8554416298866272, "learning_rate": 2.8221149425287356e-05, "loss": 0.2054, "step": 11124 }, { "epoch": 38.36206896551724, "grad_norm": 1.0626298189163208, "learning_rate": 2.8220689655172415e-05, "loss": 0.1911, "step": 11125 }, { "epoch": 38.36551724137931, "grad_norm": 0.6079199910163879, "learning_rate": 2.8220229885057473e-05, "loss": 0.1654, "step": 11126 }, { "epoch": 38.36896551724138, "grad_norm": 0.6212504506111145, "learning_rate": 2.8219770114942532e-05, "loss": 0.1557, "step": 11127 }, { "epoch": 38.37241379310345, "grad_norm": 0.496440052986145, "learning_rate": 2.8219310344827587e-05, "loss": 0.1584, "step": 11128 }, { "epoch": 38.37586206896552, "grad_norm": 0.5732938647270203, "learning_rate": 2.8218850574712642e-05, "loss": 0.1365, "step": 11129 }, { "epoch": 38.37931034482759, "grad_norm": 1.3416622877120972, "learning_rate": 2.8218390804597705e-05, "loss": 0.1582, "step": 11130 }, { "epoch": 38.38275862068966, "grad_norm": 1.3807644844055176, "learning_rate": 2.821793103448276e-05, "loss": 0.1625, "step": 11131 }, { "epoch": 38.38620689655173, "grad_norm": 0.9106972813606262, "learning_rate": 2.8217471264367815e-05, "loss": 0.147, "step": 11132 }, { "epoch": 38.389655172413796, "grad_norm": 1.0389920473098755, "learning_rate": 2.8217011494252874e-05, "loss": 0.1461, "step": 11133 }, { "epoch": 38.39310344827586, "grad_norm": 0.7202304005622864, "learning_rate": 2.8216551724137933e-05, "loss": 0.144, "step": 11134 }, { "epoch": 38.39655172413793, "grad_norm": 0.555806577205658, "learning_rate": 2.821609195402299e-05, "loss": 0.1248, "step": 11135 }, { "epoch": 38.4, "grad_norm": 0.7730570435523987, "learning_rate": 2.8215632183908046e-05, "loss": 0.1607, "step": 11136 }, { "epoch": 38.40344827586207, "grad_norm": 0.7074033617973328, "learning_rate": 2.8215172413793102e-05, "loss": 0.127, "step": 11137 }, { "epoch": 38.40689655172414, "grad_norm": 0.6953191161155701, "learning_rate": 2.8214712643678164e-05, "loss": 0.1298, "step": 11138 }, { "epoch": 38.41034482758621, "grad_norm": 0.9239925742149353, "learning_rate": 2.821425287356322e-05, "loss": 0.1452, "step": 11139 }, { "epoch": 38.41379310344828, "grad_norm": 1.2424966096878052, "learning_rate": 2.8213793103448278e-05, "loss": 0.1271, "step": 11140 }, { "epoch": 38.41724137931035, "grad_norm": 2.8544068336486816, "learning_rate": 2.8213333333333333e-05, "loss": 0.1272, "step": 11141 }, { "epoch": 38.42068965517242, "grad_norm": 0.7662988305091858, "learning_rate": 2.8212873563218392e-05, "loss": 0.1282, "step": 11142 }, { "epoch": 38.42413793103448, "grad_norm": 0.9247673153877258, "learning_rate": 2.821241379310345e-05, "loss": 0.1239, "step": 11143 }, { "epoch": 38.42758620689655, "grad_norm": 0.7490770816802979, "learning_rate": 2.8211954022988506e-05, "loss": 0.1217, "step": 11144 }, { "epoch": 38.43103448275862, "grad_norm": 1.0376331806182861, "learning_rate": 2.8211494252873564e-05, "loss": 0.2088, "step": 11145 }, { "epoch": 38.43448275862069, "grad_norm": 1.3446253538131714, "learning_rate": 2.8211034482758623e-05, "loss": 0.2585, "step": 11146 }, { "epoch": 38.43793103448276, "grad_norm": 0.7463351488113403, "learning_rate": 2.821057471264368e-05, "loss": 0.1908, "step": 11147 }, { "epoch": 38.44137931034483, "grad_norm": 0.9018734693527222, "learning_rate": 2.8210114942528737e-05, "loss": 0.2099, "step": 11148 }, { "epoch": 38.4448275862069, "grad_norm": 0.5361537337303162, "learning_rate": 2.8209655172413792e-05, "loss": 0.1638, "step": 11149 }, { "epoch": 38.44827586206897, "grad_norm": 0.693289577960968, "learning_rate": 2.820919540229885e-05, "loss": 0.1834, "step": 11150 }, { "epoch": 38.45172413793104, "grad_norm": 0.7946658730506897, "learning_rate": 2.820873563218391e-05, "loss": 0.1651, "step": 11151 }, { "epoch": 38.4551724137931, "grad_norm": 0.5924472808837891, "learning_rate": 2.8208275862068965e-05, "loss": 0.1621, "step": 11152 }, { "epoch": 38.45862068965517, "grad_norm": 0.5958293676376343, "learning_rate": 2.8207816091954024e-05, "loss": 0.1666, "step": 11153 }, { "epoch": 38.46206896551724, "grad_norm": 0.5247872471809387, "learning_rate": 2.820735632183908e-05, "loss": 0.157, "step": 11154 }, { "epoch": 38.46551724137931, "grad_norm": 0.541880190372467, "learning_rate": 2.820689655172414e-05, "loss": 0.1612, "step": 11155 }, { "epoch": 38.46896551724138, "grad_norm": 0.5855715870857239, "learning_rate": 2.8206436781609196e-05, "loss": 0.1473, "step": 11156 }, { "epoch": 38.47241379310345, "grad_norm": 2.371551513671875, "learning_rate": 2.8205977011494252e-05, "loss": 0.1635, "step": 11157 }, { "epoch": 38.47586206896552, "grad_norm": 0.6856994032859802, "learning_rate": 2.820551724137931e-05, "loss": 0.1512, "step": 11158 }, { "epoch": 38.47931034482759, "grad_norm": 0.8492767810821533, "learning_rate": 2.820505747126437e-05, "loss": 0.148, "step": 11159 }, { "epoch": 38.48275862068966, "grad_norm": 1.0750398635864258, "learning_rate": 2.8204597701149424e-05, "loss": 0.1501, "step": 11160 }, { "epoch": 38.48620689655172, "grad_norm": 0.5936921238899231, "learning_rate": 2.8204137931034483e-05, "loss": 0.1534, "step": 11161 }, { "epoch": 38.48965517241379, "grad_norm": 0.817501425743103, "learning_rate": 2.820367816091954e-05, "loss": 0.1196, "step": 11162 }, { "epoch": 38.49310344827586, "grad_norm": 0.9979875087738037, "learning_rate": 2.82032183908046e-05, "loss": 0.1327, "step": 11163 }, { "epoch": 38.49655172413793, "grad_norm": 0.7728829979896545, "learning_rate": 2.8202758620689656e-05, "loss": 0.1424, "step": 11164 }, { "epoch": 38.5, "grad_norm": 0.8059385418891907, "learning_rate": 2.820229885057471e-05, "loss": 0.1282, "step": 11165 }, { "epoch": 38.50344827586207, "grad_norm": 0.7044061422348022, "learning_rate": 2.820183908045977e-05, "loss": 0.1546, "step": 11166 }, { "epoch": 38.50689655172414, "grad_norm": 0.6805763840675354, "learning_rate": 2.820137931034483e-05, "loss": 0.1217, "step": 11167 }, { "epoch": 38.51034482758621, "grad_norm": 1.2128427028656006, "learning_rate": 2.8200919540229887e-05, "loss": 0.1011, "step": 11168 }, { "epoch": 38.51379310344828, "grad_norm": 0.8982587456703186, "learning_rate": 2.8200459770114942e-05, "loss": 0.1269, "step": 11169 }, { "epoch": 38.51724137931034, "grad_norm": 1.0519921779632568, "learning_rate": 2.8199999999999998e-05, "loss": 0.1741, "step": 11170 }, { "epoch": 38.52068965517241, "grad_norm": 0.712051510810852, "learning_rate": 2.819954022988506e-05, "loss": 0.2228, "step": 11171 }, { "epoch": 38.52413793103448, "grad_norm": 1.0443302392959595, "learning_rate": 2.8199080459770115e-05, "loss": 0.1965, "step": 11172 }, { "epoch": 38.52758620689655, "grad_norm": 0.8447859883308411, "learning_rate": 2.8198620689655174e-05, "loss": 0.1917, "step": 11173 }, { "epoch": 38.53103448275862, "grad_norm": 1.6274302005767822, "learning_rate": 2.819816091954023e-05, "loss": 0.1663, "step": 11174 }, { "epoch": 38.53448275862069, "grad_norm": 0.6539332866668701, "learning_rate": 2.8197701149425288e-05, "loss": 0.184, "step": 11175 }, { "epoch": 38.53793103448276, "grad_norm": 2.3592281341552734, "learning_rate": 2.8197241379310346e-05, "loss": 0.1568, "step": 11176 }, { "epoch": 38.54137931034483, "grad_norm": 0.778899610042572, "learning_rate": 2.81967816091954e-05, "loss": 0.1901, "step": 11177 }, { "epoch": 38.5448275862069, "grad_norm": 0.66343092918396, "learning_rate": 2.819632183908046e-05, "loss": 0.1642, "step": 11178 }, { "epoch": 38.54827586206896, "grad_norm": 1.4064241647720337, "learning_rate": 2.819586206896552e-05, "loss": 0.1544, "step": 11179 }, { "epoch": 38.55172413793103, "grad_norm": 0.7369663715362549, "learning_rate": 2.8195402298850574e-05, "loss": 0.138, "step": 11180 }, { "epoch": 38.5551724137931, "grad_norm": 0.7397128343582153, "learning_rate": 2.8194942528735633e-05, "loss": 0.1493, "step": 11181 }, { "epoch": 38.55862068965517, "grad_norm": 0.9013453722000122, "learning_rate": 2.819448275862069e-05, "loss": 0.1754, "step": 11182 }, { "epoch": 38.56206896551724, "grad_norm": 0.7493063807487488, "learning_rate": 2.819402298850575e-05, "loss": 0.1474, "step": 11183 }, { "epoch": 38.56551724137931, "grad_norm": 0.9977419972419739, "learning_rate": 2.8193563218390806e-05, "loss": 0.1724, "step": 11184 }, { "epoch": 38.56896551724138, "grad_norm": 1.036911964416504, "learning_rate": 2.819310344827586e-05, "loss": 0.1644, "step": 11185 }, { "epoch": 38.57241379310345, "grad_norm": 1.0633352994918823, "learning_rate": 2.819264367816092e-05, "loss": 0.1588, "step": 11186 }, { "epoch": 38.57586206896552, "grad_norm": 1.0141501426696777, "learning_rate": 2.819218390804598e-05, "loss": 0.1336, "step": 11187 }, { "epoch": 38.57931034482758, "grad_norm": 0.8761879205703735, "learning_rate": 2.8191724137931034e-05, "loss": 0.1414, "step": 11188 }, { "epoch": 38.58275862068965, "grad_norm": 0.6292900443077087, "learning_rate": 2.8191264367816092e-05, "loss": 0.1258, "step": 11189 }, { "epoch": 38.58620689655172, "grad_norm": 1.840221881866455, "learning_rate": 2.8190804597701148e-05, "loss": 0.1381, "step": 11190 }, { "epoch": 38.58965517241379, "grad_norm": 0.7856280207633972, "learning_rate": 2.819034482758621e-05, "loss": 0.1404, "step": 11191 }, { "epoch": 38.59310344827586, "grad_norm": 0.9368109107017517, "learning_rate": 2.8189885057471265e-05, "loss": 0.1211, "step": 11192 }, { "epoch": 38.59655172413793, "grad_norm": 0.7232136130332947, "learning_rate": 2.818942528735632e-05, "loss": 0.1263, "step": 11193 }, { "epoch": 38.6, "grad_norm": 0.7170366048812866, "learning_rate": 2.818896551724138e-05, "loss": 0.1318, "step": 11194 }, { "epoch": 38.60344827586207, "grad_norm": 1.0634135007858276, "learning_rate": 2.8188505747126438e-05, "loss": 0.1605, "step": 11195 }, { "epoch": 38.60689655172414, "grad_norm": 0.5701091289520264, "learning_rate": 2.8188045977011496e-05, "loss": 0.235, "step": 11196 }, { "epoch": 38.610344827586204, "grad_norm": 0.7595037221908569, "learning_rate": 2.818758620689655e-05, "loss": 0.2031, "step": 11197 }, { "epoch": 38.61379310344827, "grad_norm": 0.7609796524047852, "learning_rate": 2.8187126436781607e-05, "loss": 0.1746, "step": 11198 }, { "epoch": 38.61724137931034, "grad_norm": 0.741205632686615, "learning_rate": 2.818666666666667e-05, "loss": 0.1755, "step": 11199 }, { "epoch": 38.62068965517241, "grad_norm": 0.5766764283180237, "learning_rate": 2.8186206896551724e-05, "loss": 0.169, "step": 11200 }, { "epoch": 38.62413793103448, "grad_norm": 0.8556082844734192, "learning_rate": 2.8185747126436783e-05, "loss": 0.1857, "step": 11201 }, { "epoch": 38.62758620689655, "grad_norm": 0.8908397555351257, "learning_rate": 2.8185287356321838e-05, "loss": 0.1601, "step": 11202 }, { "epoch": 38.63103448275862, "grad_norm": 1.4185616970062256, "learning_rate": 2.8184827586206897e-05, "loss": 0.1557, "step": 11203 }, { "epoch": 38.63448275862069, "grad_norm": 0.6099755167961121, "learning_rate": 2.8184367816091956e-05, "loss": 0.1468, "step": 11204 }, { "epoch": 38.63793103448276, "grad_norm": 0.7966406345367432, "learning_rate": 2.818390804597701e-05, "loss": 0.1514, "step": 11205 }, { "epoch": 38.641379310344824, "grad_norm": 1.1357074975967407, "learning_rate": 2.818344827586207e-05, "loss": 0.1456, "step": 11206 }, { "epoch": 38.644827586206894, "grad_norm": 1.0193994045257568, "learning_rate": 2.8182988505747128e-05, "loss": 0.16, "step": 11207 }, { "epoch": 38.648275862068964, "grad_norm": 1.0271649360656738, "learning_rate": 2.8182528735632184e-05, "loss": 0.1416, "step": 11208 }, { "epoch": 38.65172413793103, "grad_norm": 0.6540797352790833, "learning_rate": 2.8182068965517242e-05, "loss": 0.1504, "step": 11209 }, { "epoch": 38.6551724137931, "grad_norm": 0.669283926486969, "learning_rate": 2.8181609195402298e-05, "loss": 0.1587, "step": 11210 }, { "epoch": 38.65862068965517, "grad_norm": 3.3099746704101562, "learning_rate": 2.818114942528736e-05, "loss": 0.138, "step": 11211 }, { "epoch": 38.66206896551724, "grad_norm": 0.7048425078392029, "learning_rate": 2.8180689655172415e-05, "loss": 0.1377, "step": 11212 }, { "epoch": 38.66551724137931, "grad_norm": 3.8509955406188965, "learning_rate": 2.818022988505747e-05, "loss": 0.1572, "step": 11213 }, { "epoch": 38.66896551724138, "grad_norm": 0.6190562844276428, "learning_rate": 2.817977011494253e-05, "loss": 0.117, "step": 11214 }, { "epoch": 38.672413793103445, "grad_norm": 0.7845790386199951, "learning_rate": 2.8179310344827588e-05, "loss": 0.1339, "step": 11215 }, { "epoch": 38.675862068965515, "grad_norm": 1.3034923076629639, "learning_rate": 2.8178850574712646e-05, "loss": 0.1236, "step": 11216 }, { "epoch": 38.679310344827584, "grad_norm": 0.6990228295326233, "learning_rate": 2.81783908045977e-05, "loss": 0.1069, "step": 11217 }, { "epoch": 38.682758620689654, "grad_norm": 0.9575676321983337, "learning_rate": 2.8177931034482757e-05, "loss": 0.1258, "step": 11218 }, { "epoch": 38.686206896551724, "grad_norm": 0.7768901586532593, "learning_rate": 2.817747126436782e-05, "loss": 0.1392, "step": 11219 }, { "epoch": 38.689655172413794, "grad_norm": 1.1386855840682983, "learning_rate": 2.8177011494252874e-05, "loss": 0.1685, "step": 11220 }, { "epoch": 38.69310344827586, "grad_norm": 0.674602746963501, "learning_rate": 2.817655172413793e-05, "loss": 0.2232, "step": 11221 }, { "epoch": 38.69655172413793, "grad_norm": 0.7140346765518188, "learning_rate": 2.8176091954022988e-05, "loss": 0.2039, "step": 11222 }, { "epoch": 38.7, "grad_norm": 0.6936749219894409, "learning_rate": 2.8175632183908047e-05, "loss": 0.1843, "step": 11223 }, { "epoch": 38.703448275862065, "grad_norm": 0.7373558282852173, "learning_rate": 2.8175172413793106e-05, "loss": 0.1581, "step": 11224 }, { "epoch": 38.706896551724135, "grad_norm": 1.226414442062378, "learning_rate": 2.817471264367816e-05, "loss": 0.1753, "step": 11225 }, { "epoch": 38.710344827586205, "grad_norm": 1.0694701671600342, "learning_rate": 2.8174252873563216e-05, "loss": 0.1643, "step": 11226 }, { "epoch": 38.713793103448275, "grad_norm": 0.8881990909576416, "learning_rate": 2.8173793103448278e-05, "loss": 0.1766, "step": 11227 }, { "epoch": 38.717241379310344, "grad_norm": 0.8632447719573975, "learning_rate": 2.8173333333333334e-05, "loss": 0.1886, "step": 11228 }, { "epoch": 38.720689655172414, "grad_norm": 0.9795783758163452, "learning_rate": 2.8172873563218392e-05, "loss": 0.1837, "step": 11229 }, { "epoch": 38.724137931034484, "grad_norm": 1.17487633228302, "learning_rate": 2.8172413793103447e-05, "loss": 0.145, "step": 11230 }, { "epoch": 38.727586206896554, "grad_norm": 0.6265134811401367, "learning_rate": 2.8171954022988506e-05, "loss": 0.1517, "step": 11231 }, { "epoch": 38.73103448275862, "grad_norm": 1.9006118774414062, "learning_rate": 2.8171494252873565e-05, "loss": 0.1642, "step": 11232 }, { "epoch": 38.734482758620686, "grad_norm": 0.6617832779884338, "learning_rate": 2.817103448275862e-05, "loss": 0.1569, "step": 11233 }, { "epoch": 38.737931034482756, "grad_norm": 0.9698827862739563, "learning_rate": 2.817057471264368e-05, "loss": 0.1408, "step": 11234 }, { "epoch": 38.741379310344826, "grad_norm": 0.6907277703285217, "learning_rate": 2.8170114942528738e-05, "loss": 0.1459, "step": 11235 }, { "epoch": 38.744827586206895, "grad_norm": 0.6659308075904846, "learning_rate": 2.8169655172413793e-05, "loss": 0.1455, "step": 11236 }, { "epoch": 38.748275862068965, "grad_norm": 0.9962143898010254, "learning_rate": 2.816919540229885e-05, "loss": 0.1424, "step": 11237 }, { "epoch": 38.751724137931035, "grad_norm": 0.5742040872573853, "learning_rate": 2.8168735632183907e-05, "loss": 0.1398, "step": 11238 }, { "epoch": 38.755172413793105, "grad_norm": 0.9889978170394897, "learning_rate": 2.816827586206897e-05, "loss": 0.1347, "step": 11239 }, { "epoch": 38.758620689655174, "grad_norm": 0.7384815812110901, "learning_rate": 2.8167816091954024e-05, "loss": 0.1177, "step": 11240 }, { "epoch": 38.762068965517244, "grad_norm": 0.8404116034507751, "learning_rate": 2.816735632183908e-05, "loss": 0.129, "step": 11241 }, { "epoch": 38.765517241379314, "grad_norm": 0.6753665804862976, "learning_rate": 2.8166896551724138e-05, "loss": 0.1138, "step": 11242 }, { "epoch": 38.76896551724138, "grad_norm": 0.9615057706832886, "learning_rate": 2.8166436781609197e-05, "loss": 0.1261, "step": 11243 }, { "epoch": 38.772413793103446, "grad_norm": 1.154229998588562, "learning_rate": 2.8165977011494255e-05, "loss": 0.1195, "step": 11244 }, { "epoch": 38.775862068965516, "grad_norm": 1.661726474761963, "learning_rate": 2.816551724137931e-05, "loss": 0.1417, "step": 11245 }, { "epoch": 38.779310344827586, "grad_norm": 2.558492660522461, "learning_rate": 2.8165057471264366e-05, "loss": 0.2247, "step": 11246 }, { "epoch": 38.782758620689656, "grad_norm": 0.5422631502151489, "learning_rate": 2.8164597701149428e-05, "loss": 0.1866, "step": 11247 }, { "epoch": 38.786206896551725, "grad_norm": 0.5166971683502197, "learning_rate": 2.8164137931034483e-05, "loss": 0.201, "step": 11248 }, { "epoch": 38.789655172413795, "grad_norm": 0.8265708088874817, "learning_rate": 2.816367816091954e-05, "loss": 0.1776, "step": 11249 }, { "epoch": 38.793103448275865, "grad_norm": 1.0693845748901367, "learning_rate": 2.8163218390804597e-05, "loss": 0.1796, "step": 11250 }, { "epoch": 38.796551724137935, "grad_norm": 0.6405310034751892, "learning_rate": 2.8162758620689656e-05, "loss": 0.1773, "step": 11251 }, { "epoch": 38.8, "grad_norm": 0.7177334427833557, "learning_rate": 2.8162298850574715e-05, "loss": 0.1751, "step": 11252 }, { "epoch": 38.80344827586207, "grad_norm": 0.7087022662162781, "learning_rate": 2.816183908045977e-05, "loss": 0.1711, "step": 11253 }, { "epoch": 38.80689655172414, "grad_norm": 0.6847899556159973, "learning_rate": 2.8161379310344825e-05, "loss": 0.1366, "step": 11254 }, { "epoch": 38.810344827586206, "grad_norm": 0.6740031838417053, "learning_rate": 2.8160919540229887e-05, "loss": 0.1679, "step": 11255 }, { "epoch": 38.813793103448276, "grad_norm": 0.8846389651298523, "learning_rate": 2.8160459770114943e-05, "loss": 0.1506, "step": 11256 }, { "epoch": 38.817241379310346, "grad_norm": 1.050904393196106, "learning_rate": 2.816e-05, "loss": 0.1412, "step": 11257 }, { "epoch": 38.820689655172416, "grad_norm": 0.5958207845687866, "learning_rate": 2.8159540229885057e-05, "loss": 0.146, "step": 11258 }, { "epoch": 38.824137931034485, "grad_norm": 1.9387481212615967, "learning_rate": 2.8159080459770115e-05, "loss": 0.1524, "step": 11259 }, { "epoch": 38.827586206896555, "grad_norm": 0.675316572189331, "learning_rate": 2.8158620689655174e-05, "loss": 0.1672, "step": 11260 }, { "epoch": 38.83103448275862, "grad_norm": 0.8230834007263184, "learning_rate": 2.815816091954023e-05, "loss": 0.1504, "step": 11261 }, { "epoch": 38.83448275862069, "grad_norm": 0.6209582090377808, "learning_rate": 2.8157701149425288e-05, "loss": 0.1349, "step": 11262 }, { "epoch": 38.83793103448276, "grad_norm": 1.9919519424438477, "learning_rate": 2.8157241379310347e-05, "loss": 0.1511, "step": 11263 }, { "epoch": 38.84137931034483, "grad_norm": 0.7193061113357544, "learning_rate": 2.8156781609195402e-05, "loss": 0.1433, "step": 11264 }, { "epoch": 38.8448275862069, "grad_norm": 1.1764130592346191, "learning_rate": 2.815632183908046e-05, "loss": 0.1382, "step": 11265 }, { "epoch": 38.84827586206897, "grad_norm": 2.513911485671997, "learning_rate": 2.8155862068965516e-05, "loss": 0.1382, "step": 11266 }, { "epoch": 38.851724137931036, "grad_norm": 0.906965970993042, "learning_rate": 2.8155402298850578e-05, "loss": 0.1118, "step": 11267 }, { "epoch": 38.855172413793106, "grad_norm": 0.846393883228302, "learning_rate": 2.8154942528735633e-05, "loss": 0.121, "step": 11268 }, { "epoch": 38.858620689655176, "grad_norm": 1.06280517578125, "learning_rate": 2.815448275862069e-05, "loss": 0.1765, "step": 11269 }, { "epoch": 38.86206896551724, "grad_norm": 1.3685685396194458, "learning_rate": 2.8154022988505747e-05, "loss": 0.1882, "step": 11270 }, { "epoch": 38.86551724137931, "grad_norm": 0.7835462689399719, "learning_rate": 2.8153563218390806e-05, "loss": 0.2189, "step": 11271 }, { "epoch": 38.86896551724138, "grad_norm": 0.5669861435890198, "learning_rate": 2.8153103448275865e-05, "loss": 0.1614, "step": 11272 }, { "epoch": 38.87241379310345, "grad_norm": 0.8797566294670105, "learning_rate": 2.815264367816092e-05, "loss": 0.1926, "step": 11273 }, { "epoch": 38.87586206896552, "grad_norm": 0.6453063488006592, "learning_rate": 2.8152183908045975e-05, "loss": 0.1725, "step": 11274 }, { "epoch": 38.87931034482759, "grad_norm": 0.6178017258644104, "learning_rate": 2.8151724137931037e-05, "loss": 0.1767, "step": 11275 }, { "epoch": 38.88275862068966, "grad_norm": 0.8502981662750244, "learning_rate": 2.8151264367816093e-05, "loss": 0.1924, "step": 11276 }, { "epoch": 38.88620689655173, "grad_norm": 0.5046601295471191, "learning_rate": 2.8150804597701148e-05, "loss": 0.1677, "step": 11277 }, { "epoch": 38.889655172413796, "grad_norm": 0.8824790716171265, "learning_rate": 2.8150344827586207e-05, "loss": 0.1649, "step": 11278 }, { "epoch": 38.89310344827586, "grad_norm": 0.547552764415741, "learning_rate": 2.8149885057471265e-05, "loss": 0.169, "step": 11279 }, { "epoch": 38.89655172413793, "grad_norm": 0.5297574400901794, "learning_rate": 2.8149425287356324e-05, "loss": 0.1518, "step": 11280 }, { "epoch": 38.9, "grad_norm": 0.6053862571716309, "learning_rate": 2.814896551724138e-05, "loss": 0.1612, "step": 11281 }, { "epoch": 38.90344827586207, "grad_norm": 0.7442946434020996, "learning_rate": 2.8148505747126435e-05, "loss": 0.1471, "step": 11282 }, { "epoch": 38.90689655172414, "grad_norm": 0.6418216228485107, "learning_rate": 2.8148045977011497e-05, "loss": 0.1492, "step": 11283 }, { "epoch": 38.91034482758621, "grad_norm": 0.5186421275138855, "learning_rate": 2.8147586206896552e-05, "loss": 0.1468, "step": 11284 }, { "epoch": 38.91379310344828, "grad_norm": 0.6584075093269348, "learning_rate": 2.814712643678161e-05, "loss": 0.1599, "step": 11285 }, { "epoch": 38.91724137931035, "grad_norm": 0.5549549460411072, "learning_rate": 2.8146666666666666e-05, "loss": 0.1358, "step": 11286 }, { "epoch": 38.92068965517242, "grad_norm": 0.79872065782547, "learning_rate": 2.8146206896551725e-05, "loss": 0.1275, "step": 11287 }, { "epoch": 38.92413793103448, "grad_norm": 0.7759601473808289, "learning_rate": 2.8145747126436783e-05, "loss": 0.1221, "step": 11288 }, { "epoch": 38.92758620689655, "grad_norm": 1.0268313884735107, "learning_rate": 2.814528735632184e-05, "loss": 0.1338, "step": 11289 }, { "epoch": 38.93103448275862, "grad_norm": 0.8918996453285217, "learning_rate": 2.8144827586206897e-05, "loss": 0.1302, "step": 11290 }, { "epoch": 38.93448275862069, "grad_norm": 0.6283668279647827, "learning_rate": 2.8144367816091956e-05, "loss": 0.1472, "step": 11291 }, { "epoch": 38.93793103448276, "grad_norm": 0.7148279547691345, "learning_rate": 2.814390804597701e-05, "loss": 0.1096, "step": 11292 }, { "epoch": 38.94137931034483, "grad_norm": 0.8609217405319214, "learning_rate": 2.814344827586207e-05, "loss": 0.1181, "step": 11293 }, { "epoch": 38.9448275862069, "grad_norm": 2.1705758571624756, "learning_rate": 2.8142988505747125e-05, "loss": 0.1173, "step": 11294 }, { "epoch": 38.94827586206897, "grad_norm": 1.1649090051651, "learning_rate": 2.8142528735632187e-05, "loss": 0.1902, "step": 11295 }, { "epoch": 38.95172413793104, "grad_norm": 0.7025328874588013, "learning_rate": 2.8142068965517243e-05, "loss": 0.1935, "step": 11296 }, { "epoch": 38.9551724137931, "grad_norm": 0.7072032690048218, "learning_rate": 2.8141609195402298e-05, "loss": 0.17, "step": 11297 }, { "epoch": 38.95862068965517, "grad_norm": 0.5120487809181213, "learning_rate": 2.8141149425287357e-05, "loss": 0.1778, "step": 11298 }, { "epoch": 38.96206896551724, "grad_norm": 0.7953117489814758, "learning_rate": 2.8140689655172415e-05, "loss": 0.1643, "step": 11299 }, { "epoch": 38.96551724137931, "grad_norm": 1.2818915843963623, "learning_rate": 2.8140229885057474e-05, "loss": 0.1648, "step": 11300 }, { "epoch": 38.96896551724138, "grad_norm": 0.9333861470222473, "learning_rate": 2.813977011494253e-05, "loss": 0.1882, "step": 11301 }, { "epoch": 38.97241379310345, "grad_norm": 0.685040295124054, "learning_rate": 2.8139310344827585e-05, "loss": 0.1538, "step": 11302 }, { "epoch": 38.97586206896552, "grad_norm": 0.8104397058486938, "learning_rate": 2.8138850574712647e-05, "loss": 0.1505, "step": 11303 }, { "epoch": 38.97931034482759, "grad_norm": 0.7120717763900757, "learning_rate": 2.8138390804597702e-05, "loss": 0.1573, "step": 11304 }, { "epoch": 38.98275862068966, "grad_norm": 0.8509507179260254, "learning_rate": 2.813793103448276e-05, "loss": 0.1634, "step": 11305 }, { "epoch": 38.98620689655172, "grad_norm": 1.565614104270935, "learning_rate": 2.8137471264367816e-05, "loss": 0.1192, "step": 11306 }, { "epoch": 38.98965517241379, "grad_norm": 1.0979952812194824, "learning_rate": 2.8137011494252875e-05, "loss": 0.1307, "step": 11307 }, { "epoch": 38.99310344827586, "grad_norm": 0.9961676001548767, "learning_rate": 2.8136551724137933e-05, "loss": 0.1287, "step": 11308 }, { "epoch": 38.99655172413793, "grad_norm": 1.2895593643188477, "learning_rate": 2.813609195402299e-05, "loss": 0.1135, "step": 11309 }, { "epoch": 39.0, "grad_norm": 1.1720943450927734, "learning_rate": 2.8135632183908044e-05, "loss": 0.1684, "step": 11310 }, { "epoch": 39.00344827586207, "grad_norm": 1.0253268480300903, "learning_rate": 2.8135172413793106e-05, "loss": 0.2464, "step": 11311 }, { "epoch": 39.00689655172414, "grad_norm": 0.639373242855072, "learning_rate": 2.813471264367816e-05, "loss": 0.1996, "step": 11312 }, { "epoch": 39.01034482758621, "grad_norm": 1.7809005975723267, "learning_rate": 2.813425287356322e-05, "loss": 0.1662, "step": 11313 }, { "epoch": 39.01379310344828, "grad_norm": 0.6196741461753845, "learning_rate": 2.8133793103448275e-05, "loss": 0.1673, "step": 11314 }, { "epoch": 39.01724137931034, "grad_norm": 0.8031917214393616, "learning_rate": 2.8133333333333334e-05, "loss": 0.1571, "step": 11315 }, { "epoch": 39.02068965517241, "grad_norm": 0.6652750968933105, "learning_rate": 2.8132873563218393e-05, "loss": 0.1812, "step": 11316 }, { "epoch": 39.02413793103448, "grad_norm": 0.6250159740447998, "learning_rate": 2.8132413793103448e-05, "loss": 0.1665, "step": 11317 }, { "epoch": 39.02758620689655, "grad_norm": 0.5972087383270264, "learning_rate": 2.8131954022988507e-05, "loss": 0.1731, "step": 11318 }, { "epoch": 39.03103448275862, "grad_norm": 1.7323660850524902, "learning_rate": 2.8131494252873565e-05, "loss": 0.1429, "step": 11319 }, { "epoch": 39.03448275862069, "grad_norm": 0.7930275797843933, "learning_rate": 2.813103448275862e-05, "loss": 0.1575, "step": 11320 }, { "epoch": 39.03793103448276, "grad_norm": 0.691792905330658, "learning_rate": 2.813057471264368e-05, "loss": 0.1434, "step": 11321 }, { "epoch": 39.04137931034483, "grad_norm": 0.5114172101020813, "learning_rate": 2.8130114942528735e-05, "loss": 0.1478, "step": 11322 }, { "epoch": 39.0448275862069, "grad_norm": 0.8113369941711426, "learning_rate": 2.8129655172413797e-05, "loss": 0.1446, "step": 11323 }, { "epoch": 39.04827586206896, "grad_norm": 0.5466988682746887, "learning_rate": 2.8129195402298852e-05, "loss": 0.1445, "step": 11324 }, { "epoch": 39.05172413793103, "grad_norm": 0.8558183312416077, "learning_rate": 2.8128735632183907e-05, "loss": 0.1358, "step": 11325 }, { "epoch": 39.0551724137931, "grad_norm": 0.6201282739639282, "learning_rate": 2.8128275862068966e-05, "loss": 0.1212, "step": 11326 }, { "epoch": 39.05862068965517, "grad_norm": 0.5900123119354248, "learning_rate": 2.8127816091954025e-05, "loss": 0.1322, "step": 11327 }, { "epoch": 39.06206896551724, "grad_norm": 0.5589483976364136, "learning_rate": 2.8127356321839083e-05, "loss": 0.1284, "step": 11328 }, { "epoch": 39.06551724137931, "grad_norm": 1.0415207147598267, "learning_rate": 2.812689655172414e-05, "loss": 0.1082, "step": 11329 }, { "epoch": 39.06896551724138, "grad_norm": 1.2627681493759155, "learning_rate": 2.8126436781609194e-05, "loss": 0.1178, "step": 11330 }, { "epoch": 39.07241379310345, "grad_norm": 0.8590753078460693, "learning_rate": 2.8125977011494256e-05, "loss": 0.1139, "step": 11331 }, { "epoch": 39.07586206896552, "grad_norm": 0.6881011128425598, "learning_rate": 2.812551724137931e-05, "loss": 0.0885, "step": 11332 }, { "epoch": 39.07931034482758, "grad_norm": 0.6686098575592041, "learning_rate": 2.812505747126437e-05, "loss": 0.0989, "step": 11333 }, { "epoch": 39.08275862068965, "grad_norm": 1.1820886135101318, "learning_rate": 2.8124597701149425e-05, "loss": 0.1161, "step": 11334 }, { "epoch": 39.08620689655172, "grad_norm": 1.5159265995025635, "learning_rate": 2.8124137931034484e-05, "loss": 0.1628, "step": 11335 }, { "epoch": 39.08965517241379, "grad_norm": 0.8943869471549988, "learning_rate": 2.8123678160919543e-05, "loss": 0.2254, "step": 11336 }, { "epoch": 39.09310344827586, "grad_norm": 0.6968327760696411, "learning_rate": 2.8123218390804598e-05, "loss": 0.2095, "step": 11337 }, { "epoch": 39.09655172413793, "grad_norm": 1.3079099655151367, "learning_rate": 2.8122758620689653e-05, "loss": 0.1567, "step": 11338 }, { "epoch": 39.1, "grad_norm": 1.5072187185287476, "learning_rate": 2.8122298850574715e-05, "loss": 0.1649, "step": 11339 }, { "epoch": 39.10344827586207, "grad_norm": 0.6269375085830688, "learning_rate": 2.812183908045977e-05, "loss": 0.1536, "step": 11340 }, { "epoch": 39.10689655172414, "grad_norm": 0.9921271204948425, "learning_rate": 2.812137931034483e-05, "loss": 0.175, "step": 11341 }, { "epoch": 39.110344827586204, "grad_norm": 1.8879188299179077, "learning_rate": 2.8120919540229884e-05, "loss": 0.167, "step": 11342 }, { "epoch": 39.11379310344827, "grad_norm": 0.7134392261505127, "learning_rate": 2.8120459770114943e-05, "loss": 0.1619, "step": 11343 }, { "epoch": 39.11724137931034, "grad_norm": 0.651421844959259, "learning_rate": 2.8120000000000002e-05, "loss": 0.1367, "step": 11344 }, { "epoch": 39.12068965517241, "grad_norm": 0.7347873449325562, "learning_rate": 2.8119540229885057e-05, "loss": 0.1416, "step": 11345 }, { "epoch": 39.12413793103448, "grad_norm": 0.6486946940422058, "learning_rate": 2.8119080459770116e-05, "loss": 0.1481, "step": 11346 }, { "epoch": 39.12758620689655, "grad_norm": 1.0482981204986572, "learning_rate": 2.8118620689655174e-05, "loss": 0.1384, "step": 11347 }, { "epoch": 39.13103448275862, "grad_norm": 1.5896856784820557, "learning_rate": 2.811816091954023e-05, "loss": 0.148, "step": 11348 }, { "epoch": 39.13448275862069, "grad_norm": 0.5301230549812317, "learning_rate": 2.811770114942529e-05, "loss": 0.1105, "step": 11349 }, { "epoch": 39.13793103448276, "grad_norm": 0.8420941233634949, "learning_rate": 2.8117241379310344e-05, "loss": 0.1358, "step": 11350 }, { "epoch": 39.141379310344824, "grad_norm": 1.6486687660217285, "learning_rate": 2.8116781609195406e-05, "loss": 0.1194, "step": 11351 }, { "epoch": 39.144827586206894, "grad_norm": 0.5849422812461853, "learning_rate": 2.811632183908046e-05, "loss": 0.1264, "step": 11352 }, { "epoch": 39.148275862068964, "grad_norm": 0.7122141718864441, "learning_rate": 2.8115862068965516e-05, "loss": 0.1453, "step": 11353 }, { "epoch": 39.15172413793103, "grad_norm": 1.420987844467163, "learning_rate": 2.8115402298850575e-05, "loss": 0.1343, "step": 11354 }, { "epoch": 39.1551724137931, "grad_norm": 2.1680684089660645, "learning_rate": 2.8114942528735634e-05, "loss": 0.1274, "step": 11355 }, { "epoch": 39.15862068965517, "grad_norm": 0.5910596251487732, "learning_rate": 2.8114482758620692e-05, "loss": 0.1037, "step": 11356 }, { "epoch": 39.16206896551724, "grad_norm": 1.0430805683135986, "learning_rate": 2.8114022988505748e-05, "loss": 0.1164, "step": 11357 }, { "epoch": 39.16551724137931, "grad_norm": 1.087209939956665, "learning_rate": 2.8113563218390803e-05, "loss": 0.092, "step": 11358 }, { "epoch": 39.16896551724138, "grad_norm": 0.7241266965866089, "learning_rate": 2.8113103448275865e-05, "loss": 0.1064, "step": 11359 }, { "epoch": 39.172413793103445, "grad_norm": 2.1909291744232178, "learning_rate": 2.811264367816092e-05, "loss": 0.1136, "step": 11360 }, { "epoch": 39.175862068965515, "grad_norm": 0.7654843926429749, "learning_rate": 2.811218390804598e-05, "loss": 0.2248, "step": 11361 }, { "epoch": 39.179310344827584, "grad_norm": 1.2009152173995972, "learning_rate": 2.8111724137931034e-05, "loss": 0.1706, "step": 11362 }, { "epoch": 39.182758620689654, "grad_norm": 0.9743526577949524, "learning_rate": 2.8111264367816093e-05, "loss": 0.1729, "step": 11363 }, { "epoch": 39.186206896551724, "grad_norm": 0.7971171140670776, "learning_rate": 2.8110804597701152e-05, "loss": 0.154, "step": 11364 }, { "epoch": 39.189655172413794, "grad_norm": 0.5320194363594055, "learning_rate": 2.8110344827586207e-05, "loss": 0.1562, "step": 11365 }, { "epoch": 39.19310344827586, "grad_norm": 1.0084235668182373, "learning_rate": 2.8109885057471262e-05, "loss": 0.1559, "step": 11366 }, { "epoch": 39.19655172413793, "grad_norm": 0.5684787034988403, "learning_rate": 2.8109425287356324e-05, "loss": 0.1559, "step": 11367 }, { "epoch": 39.2, "grad_norm": 0.8705762624740601, "learning_rate": 2.810896551724138e-05, "loss": 0.1604, "step": 11368 }, { "epoch": 39.203448275862065, "grad_norm": 1.0085281133651733, "learning_rate": 2.810850574712644e-05, "loss": 0.1464, "step": 11369 }, { "epoch": 39.206896551724135, "grad_norm": 0.5042669773101807, "learning_rate": 2.8108045977011494e-05, "loss": 0.1336, "step": 11370 }, { "epoch": 39.210344827586205, "grad_norm": 0.614955484867096, "learning_rate": 2.8107586206896552e-05, "loss": 0.1266, "step": 11371 }, { "epoch": 39.213793103448275, "grad_norm": 1.8639941215515137, "learning_rate": 2.810712643678161e-05, "loss": 0.176, "step": 11372 }, { "epoch": 39.217241379310344, "grad_norm": 0.504321277141571, "learning_rate": 2.8106666666666666e-05, "loss": 0.1206, "step": 11373 }, { "epoch": 39.220689655172414, "grad_norm": 0.8796669840812683, "learning_rate": 2.8106206896551725e-05, "loss": 0.1282, "step": 11374 }, { "epoch": 39.224137931034484, "grad_norm": 0.7915198802947998, "learning_rate": 2.8105747126436784e-05, "loss": 0.1222, "step": 11375 }, { "epoch": 39.227586206896554, "grad_norm": 0.6976243257522583, "learning_rate": 2.810528735632184e-05, "loss": 0.1445, "step": 11376 }, { "epoch": 39.23103448275862, "grad_norm": 0.7113633751869202, "learning_rate": 2.8104827586206898e-05, "loss": 0.1292, "step": 11377 }, { "epoch": 39.234482758620686, "grad_norm": 1.2345213890075684, "learning_rate": 2.8104367816091953e-05, "loss": 0.1336, "step": 11378 }, { "epoch": 39.237931034482756, "grad_norm": 0.7949514389038086, "learning_rate": 2.8103908045977015e-05, "loss": 0.1245, "step": 11379 }, { "epoch": 39.241379310344826, "grad_norm": 0.6540402173995972, "learning_rate": 2.810344827586207e-05, "loss": 0.1139, "step": 11380 }, { "epoch": 39.244827586206895, "grad_norm": 0.9986185431480408, "learning_rate": 2.8102988505747126e-05, "loss": 0.124, "step": 11381 }, { "epoch": 39.248275862068965, "grad_norm": 0.6179977059364319, "learning_rate": 2.8102528735632184e-05, "loss": 0.1214, "step": 11382 }, { "epoch": 39.251724137931035, "grad_norm": 2.956439733505249, "learning_rate": 2.8102068965517243e-05, "loss": 0.1219, "step": 11383 }, { "epoch": 39.255172413793105, "grad_norm": 2.9646363258361816, "learning_rate": 2.8101609195402302e-05, "loss": 0.108, "step": 11384 }, { "epoch": 39.258620689655174, "grad_norm": 1.1277996301651, "learning_rate": 2.8101149425287357e-05, "loss": 0.1663, "step": 11385 }, { "epoch": 39.262068965517244, "grad_norm": 0.8954567313194275, "learning_rate": 2.8100689655172412e-05, "loss": 0.2158, "step": 11386 }, { "epoch": 39.265517241379314, "grad_norm": 0.5649714469909668, "learning_rate": 2.8100229885057474e-05, "loss": 0.1986, "step": 11387 }, { "epoch": 39.26896551724138, "grad_norm": 0.4658292233943939, "learning_rate": 2.809977011494253e-05, "loss": 0.176, "step": 11388 }, { "epoch": 39.272413793103446, "grad_norm": 0.6606435179710388, "learning_rate": 2.809931034482759e-05, "loss": 0.157, "step": 11389 }, { "epoch": 39.275862068965516, "grad_norm": 0.6109241247177124, "learning_rate": 2.8098850574712644e-05, "loss": 0.1645, "step": 11390 }, { "epoch": 39.279310344827586, "grad_norm": 0.71871417760849, "learning_rate": 2.8098390804597702e-05, "loss": 0.1765, "step": 11391 }, { "epoch": 39.282758620689656, "grad_norm": 0.9430921077728271, "learning_rate": 2.809793103448276e-05, "loss": 0.1626, "step": 11392 }, { "epoch": 39.286206896551725, "grad_norm": 1.2927865982055664, "learning_rate": 2.8097471264367816e-05, "loss": 0.1439, "step": 11393 }, { "epoch": 39.289655172413795, "grad_norm": 0.7112196683883667, "learning_rate": 2.8097011494252875e-05, "loss": 0.131, "step": 11394 }, { "epoch": 39.293103448275865, "grad_norm": 0.6491581201553345, "learning_rate": 2.8096551724137934e-05, "loss": 0.1389, "step": 11395 }, { "epoch": 39.296551724137935, "grad_norm": 0.8715078830718994, "learning_rate": 2.809609195402299e-05, "loss": 0.1482, "step": 11396 }, { "epoch": 39.3, "grad_norm": 1.048891544342041, "learning_rate": 2.8095632183908048e-05, "loss": 0.1618, "step": 11397 }, { "epoch": 39.30344827586207, "grad_norm": 1.1776371002197266, "learning_rate": 2.8095172413793103e-05, "loss": 0.1521, "step": 11398 }, { "epoch": 39.30689655172414, "grad_norm": 1.1293679475784302, "learning_rate": 2.809471264367816e-05, "loss": 0.143, "step": 11399 }, { "epoch": 39.310344827586206, "grad_norm": 0.6758058071136475, "learning_rate": 2.809425287356322e-05, "loss": 0.1583, "step": 11400 }, { "epoch": 39.313793103448276, "grad_norm": 2.2403564453125, "learning_rate": 2.8093793103448276e-05, "loss": 0.1384, "step": 11401 }, { "epoch": 39.317241379310346, "grad_norm": 0.687269926071167, "learning_rate": 2.8093333333333334e-05, "loss": 0.1387, "step": 11402 }, { "epoch": 39.320689655172416, "grad_norm": 1.706756830215454, "learning_rate": 2.8092873563218393e-05, "loss": 0.1237, "step": 11403 }, { "epoch": 39.324137931034485, "grad_norm": 1.2068983316421509, "learning_rate": 2.8092413793103448e-05, "loss": 0.1263, "step": 11404 }, { "epoch": 39.327586206896555, "grad_norm": 0.5400568842887878, "learning_rate": 2.8091954022988507e-05, "loss": 0.1114, "step": 11405 }, { "epoch": 39.33103448275862, "grad_norm": 1.2803703546524048, "learning_rate": 2.8091494252873562e-05, "loss": 0.1056, "step": 11406 }, { "epoch": 39.33448275862069, "grad_norm": 0.6489802002906799, "learning_rate": 2.8091034482758624e-05, "loss": 0.1099, "step": 11407 }, { "epoch": 39.33793103448276, "grad_norm": 0.6319431066513062, "learning_rate": 2.809057471264368e-05, "loss": 0.1008, "step": 11408 }, { "epoch": 39.34137931034483, "grad_norm": 0.856367826461792, "learning_rate": 2.8090114942528735e-05, "loss": 0.1146, "step": 11409 }, { "epoch": 39.3448275862069, "grad_norm": 1.344766616821289, "learning_rate": 2.8089655172413794e-05, "loss": 0.1619, "step": 11410 }, { "epoch": 39.34827586206897, "grad_norm": 0.6225053668022156, "learning_rate": 2.8089195402298852e-05, "loss": 0.2241, "step": 11411 }, { "epoch": 39.351724137931036, "grad_norm": 0.4927051067352295, "learning_rate": 2.808873563218391e-05, "loss": 0.1713, "step": 11412 }, { "epoch": 39.355172413793106, "grad_norm": 0.4878547191619873, "learning_rate": 2.8088275862068966e-05, "loss": 0.1898, "step": 11413 }, { "epoch": 39.358620689655176, "grad_norm": 0.453934907913208, "learning_rate": 2.808781609195402e-05, "loss": 0.1788, "step": 11414 }, { "epoch": 39.36206896551724, "grad_norm": 0.5316328406333923, "learning_rate": 2.8087356321839084e-05, "loss": 0.1842, "step": 11415 }, { "epoch": 39.36551724137931, "grad_norm": 1.2046620845794678, "learning_rate": 2.808689655172414e-05, "loss": 0.139, "step": 11416 }, { "epoch": 39.36896551724138, "grad_norm": 0.4694715142250061, "learning_rate": 2.8086436781609198e-05, "loss": 0.1581, "step": 11417 }, { "epoch": 39.37241379310345, "grad_norm": 0.6577185392379761, "learning_rate": 2.8085977011494253e-05, "loss": 0.1464, "step": 11418 }, { "epoch": 39.37586206896552, "grad_norm": 1.0235077142715454, "learning_rate": 2.808551724137931e-05, "loss": 0.1544, "step": 11419 }, { "epoch": 39.37931034482759, "grad_norm": 1.173883318901062, "learning_rate": 2.808505747126437e-05, "loss": 0.1544, "step": 11420 }, { "epoch": 39.38275862068966, "grad_norm": 0.7688548564910889, "learning_rate": 2.8084597701149426e-05, "loss": 0.1423, "step": 11421 }, { "epoch": 39.38620689655173, "grad_norm": 0.7255688309669495, "learning_rate": 2.8084137931034484e-05, "loss": 0.1305, "step": 11422 }, { "epoch": 39.389655172413796, "grad_norm": 1.0433151721954346, "learning_rate": 2.8083678160919543e-05, "loss": 0.1377, "step": 11423 }, { "epoch": 39.39310344827586, "grad_norm": 0.4874139726161957, "learning_rate": 2.8083218390804598e-05, "loss": 0.1235, "step": 11424 }, { "epoch": 39.39655172413793, "grad_norm": 0.7005029320716858, "learning_rate": 2.8082758620689657e-05, "loss": 0.1524, "step": 11425 }, { "epoch": 39.4, "grad_norm": 0.920814573764801, "learning_rate": 2.8082298850574712e-05, "loss": 0.1193, "step": 11426 }, { "epoch": 39.40344827586207, "grad_norm": 0.5916476249694824, "learning_rate": 2.808183908045977e-05, "loss": 0.1169, "step": 11427 }, { "epoch": 39.40689655172414, "grad_norm": 1.0757548809051514, "learning_rate": 2.808137931034483e-05, "loss": 0.1067, "step": 11428 }, { "epoch": 39.41034482758621, "grad_norm": 0.9174897074699402, "learning_rate": 2.8080919540229885e-05, "loss": 0.104, "step": 11429 }, { "epoch": 39.41379310344828, "grad_norm": 0.9467368721961975, "learning_rate": 2.8080459770114944e-05, "loss": 0.1335, "step": 11430 }, { "epoch": 39.41724137931035, "grad_norm": 0.710720419883728, "learning_rate": 2.8080000000000002e-05, "loss": 0.1208, "step": 11431 }, { "epoch": 39.42068965517242, "grad_norm": 0.6420710682868958, "learning_rate": 2.8079540229885057e-05, "loss": 0.1098, "step": 11432 }, { "epoch": 39.42413793103448, "grad_norm": 1.1723577976226807, "learning_rate": 2.8079080459770116e-05, "loss": 0.1107, "step": 11433 }, { "epoch": 39.42758620689655, "grad_norm": 1.0184645652770996, "learning_rate": 2.807862068965517e-05, "loss": 0.1288, "step": 11434 }, { "epoch": 39.43103448275862, "grad_norm": 2.0558419227600098, "learning_rate": 2.8078160919540234e-05, "loss": 0.1802, "step": 11435 }, { "epoch": 39.43448275862069, "grad_norm": 0.6640236973762512, "learning_rate": 2.807770114942529e-05, "loss": 0.2107, "step": 11436 }, { "epoch": 39.43793103448276, "grad_norm": 0.520627498626709, "learning_rate": 2.8077241379310344e-05, "loss": 0.1906, "step": 11437 }, { "epoch": 39.44137931034483, "grad_norm": 0.627945065498352, "learning_rate": 2.8076781609195403e-05, "loss": 0.1929, "step": 11438 }, { "epoch": 39.4448275862069, "grad_norm": 0.6437147855758667, "learning_rate": 2.807632183908046e-05, "loss": 0.1545, "step": 11439 }, { "epoch": 39.44827586206897, "grad_norm": 0.6573736667633057, "learning_rate": 2.807586206896552e-05, "loss": 0.1771, "step": 11440 }, { "epoch": 39.45172413793104, "grad_norm": 0.9056336283683777, "learning_rate": 2.8075402298850575e-05, "loss": 0.1801, "step": 11441 }, { "epoch": 39.4551724137931, "grad_norm": 0.6939224600791931, "learning_rate": 2.807494252873563e-05, "loss": 0.1693, "step": 11442 }, { "epoch": 39.45862068965517, "grad_norm": 0.6820400357246399, "learning_rate": 2.8074482758620693e-05, "loss": 0.1579, "step": 11443 }, { "epoch": 39.46206896551724, "grad_norm": 0.7561465501785278, "learning_rate": 2.8074022988505748e-05, "loss": 0.1601, "step": 11444 }, { "epoch": 39.46551724137931, "grad_norm": 0.6759411692619324, "learning_rate": 2.8073563218390807e-05, "loss": 0.1404, "step": 11445 }, { "epoch": 39.46896551724138, "grad_norm": 0.7630171775817871, "learning_rate": 2.8073103448275862e-05, "loss": 0.1649, "step": 11446 }, { "epoch": 39.47241379310345, "grad_norm": 0.9129201769828796, "learning_rate": 2.8072643678160917e-05, "loss": 0.151, "step": 11447 }, { "epoch": 39.47586206896552, "grad_norm": 1.050868034362793, "learning_rate": 2.807218390804598e-05, "loss": 0.1401, "step": 11448 }, { "epoch": 39.47931034482759, "grad_norm": 0.9194563031196594, "learning_rate": 2.8071724137931035e-05, "loss": 0.1595, "step": 11449 }, { "epoch": 39.48275862068966, "grad_norm": 0.6321210861206055, "learning_rate": 2.8071264367816093e-05, "loss": 0.134, "step": 11450 }, { "epoch": 39.48620689655172, "grad_norm": 5.3997721672058105, "learning_rate": 2.807080459770115e-05, "loss": 0.1179, "step": 11451 }, { "epoch": 39.48965517241379, "grad_norm": 0.8643282651901245, "learning_rate": 2.8070344827586207e-05, "loss": 0.1468, "step": 11452 }, { "epoch": 39.49310344827586, "grad_norm": 0.9580731391906738, "learning_rate": 2.8069885057471266e-05, "loss": 0.1253, "step": 11453 }, { "epoch": 39.49655172413793, "grad_norm": 0.7964051961898804, "learning_rate": 2.806942528735632e-05, "loss": 0.1304, "step": 11454 }, { "epoch": 39.5, "grad_norm": 1.4830408096313477, "learning_rate": 2.8068965517241377e-05, "loss": 0.1212, "step": 11455 }, { "epoch": 39.50344827586207, "grad_norm": 0.704849362373352, "learning_rate": 2.806850574712644e-05, "loss": 0.1382, "step": 11456 }, { "epoch": 39.50689655172414, "grad_norm": 0.8388257622718811, "learning_rate": 2.8068045977011494e-05, "loss": 0.1024, "step": 11457 }, { "epoch": 39.51034482758621, "grad_norm": 1.078876256942749, "learning_rate": 2.8067586206896553e-05, "loss": 0.1232, "step": 11458 }, { "epoch": 39.51379310344828, "grad_norm": 0.810330331325531, "learning_rate": 2.8067126436781608e-05, "loss": 0.134, "step": 11459 }, { "epoch": 39.51724137931034, "grad_norm": 1.557500958442688, "learning_rate": 2.8066666666666667e-05, "loss": 0.1544, "step": 11460 }, { "epoch": 39.52068965517241, "grad_norm": 0.7689520716667175, "learning_rate": 2.8066206896551725e-05, "loss": 0.2147, "step": 11461 }, { "epoch": 39.52413793103448, "grad_norm": 0.766836404800415, "learning_rate": 2.806574712643678e-05, "loss": 0.1814, "step": 11462 }, { "epoch": 39.52758620689655, "grad_norm": 0.7409343123435974, "learning_rate": 2.806528735632184e-05, "loss": 0.1986, "step": 11463 }, { "epoch": 39.53103448275862, "grad_norm": 0.6706486940383911, "learning_rate": 2.8064827586206898e-05, "loss": 0.1728, "step": 11464 }, { "epoch": 39.53448275862069, "grad_norm": 0.7322885394096375, "learning_rate": 2.8064367816091953e-05, "loss": 0.1596, "step": 11465 }, { "epoch": 39.53793103448276, "grad_norm": 0.8368232846260071, "learning_rate": 2.8063908045977012e-05, "loss": 0.1491, "step": 11466 }, { "epoch": 39.54137931034483, "grad_norm": 0.537189245223999, "learning_rate": 2.8063448275862067e-05, "loss": 0.1282, "step": 11467 }, { "epoch": 39.5448275862069, "grad_norm": 0.7506726980209351, "learning_rate": 2.806298850574713e-05, "loss": 0.1538, "step": 11468 }, { "epoch": 39.54827586206896, "grad_norm": 1.7588565349578857, "learning_rate": 2.8062528735632185e-05, "loss": 0.1761, "step": 11469 }, { "epoch": 39.55172413793103, "grad_norm": 0.7439810633659363, "learning_rate": 2.806206896551724e-05, "loss": 0.1501, "step": 11470 }, { "epoch": 39.5551724137931, "grad_norm": 1.13484787940979, "learning_rate": 2.80616091954023e-05, "loss": 0.1675, "step": 11471 }, { "epoch": 39.55862068965517, "grad_norm": 0.8381078243255615, "learning_rate": 2.8061149425287357e-05, "loss": 0.1375, "step": 11472 }, { "epoch": 39.56206896551724, "grad_norm": 0.8601353764533997, "learning_rate": 2.8060689655172416e-05, "loss": 0.127, "step": 11473 }, { "epoch": 39.56551724137931, "grad_norm": 0.8690395951271057, "learning_rate": 2.806022988505747e-05, "loss": 0.1342, "step": 11474 }, { "epoch": 39.56896551724138, "grad_norm": 0.6036515235900879, "learning_rate": 2.8059770114942527e-05, "loss": 0.1372, "step": 11475 }, { "epoch": 39.57241379310345, "grad_norm": 0.6451120972633362, "learning_rate": 2.805931034482759e-05, "loss": 0.1363, "step": 11476 }, { "epoch": 39.57586206896552, "grad_norm": 0.5364387631416321, "learning_rate": 2.8058850574712644e-05, "loss": 0.1125, "step": 11477 }, { "epoch": 39.57931034482758, "grad_norm": 0.6387764811515808, "learning_rate": 2.8058390804597703e-05, "loss": 0.1368, "step": 11478 }, { "epoch": 39.58275862068965, "grad_norm": 0.7233579158782959, "learning_rate": 2.8057931034482758e-05, "loss": 0.1279, "step": 11479 }, { "epoch": 39.58620689655172, "grad_norm": 0.6326417326927185, "learning_rate": 2.8057471264367817e-05, "loss": 0.1115, "step": 11480 }, { "epoch": 39.58965517241379, "grad_norm": 0.8463090062141418, "learning_rate": 2.8057011494252875e-05, "loss": 0.1636, "step": 11481 }, { "epoch": 39.59310344827586, "grad_norm": 0.6153095364570618, "learning_rate": 2.805655172413793e-05, "loss": 0.1024, "step": 11482 }, { "epoch": 39.59655172413793, "grad_norm": 0.5719016790390015, "learning_rate": 2.805609195402299e-05, "loss": 0.1013, "step": 11483 }, { "epoch": 39.6, "grad_norm": 0.774377703666687, "learning_rate": 2.8055632183908048e-05, "loss": 0.1067, "step": 11484 }, { "epoch": 39.60344827586207, "grad_norm": 1.4463481903076172, "learning_rate": 2.8055172413793103e-05, "loss": 0.1607, "step": 11485 }, { "epoch": 39.60689655172414, "grad_norm": 1.2848585844039917, "learning_rate": 2.8054712643678162e-05, "loss": 0.2361, "step": 11486 }, { "epoch": 39.610344827586204, "grad_norm": 0.6632060408592224, "learning_rate": 2.8054252873563217e-05, "loss": 0.189, "step": 11487 }, { "epoch": 39.61379310344827, "grad_norm": 0.523815929889679, "learning_rate": 2.8053793103448276e-05, "loss": 0.1843, "step": 11488 }, { "epoch": 39.61724137931034, "grad_norm": 0.5185151100158691, "learning_rate": 2.8053333333333335e-05, "loss": 0.1866, "step": 11489 }, { "epoch": 39.62068965517241, "grad_norm": 0.7114824056625366, "learning_rate": 2.805287356321839e-05, "loss": 0.164, "step": 11490 }, { "epoch": 39.62413793103448, "grad_norm": 0.8949483633041382, "learning_rate": 2.805241379310345e-05, "loss": 0.1449, "step": 11491 }, { "epoch": 39.62758620689655, "grad_norm": 0.8068279027938843, "learning_rate": 2.8051954022988507e-05, "loss": 0.1588, "step": 11492 }, { "epoch": 39.63103448275862, "grad_norm": 0.6915461421012878, "learning_rate": 2.8051494252873563e-05, "loss": 0.1476, "step": 11493 }, { "epoch": 39.63448275862069, "grad_norm": 1.9260233640670776, "learning_rate": 2.805103448275862e-05, "loss": 0.133, "step": 11494 }, { "epoch": 39.63793103448276, "grad_norm": 0.8284426331520081, "learning_rate": 2.8050574712643677e-05, "loss": 0.1522, "step": 11495 }, { "epoch": 39.641379310344824, "grad_norm": 0.6056115031242371, "learning_rate": 2.805011494252874e-05, "loss": 0.1417, "step": 11496 }, { "epoch": 39.644827586206894, "grad_norm": 0.6201438903808594, "learning_rate": 2.8049655172413794e-05, "loss": 0.1842, "step": 11497 }, { "epoch": 39.648275862068964, "grad_norm": 0.7673490643501282, "learning_rate": 2.804919540229885e-05, "loss": 0.1357, "step": 11498 }, { "epoch": 39.65172413793103, "grad_norm": 0.7154965996742249, "learning_rate": 2.8048735632183908e-05, "loss": 0.1425, "step": 11499 }, { "epoch": 39.6551724137931, "grad_norm": 1.130608320236206, "learning_rate": 2.8048275862068967e-05, "loss": 0.1604, "step": 11500 }, { "epoch": 39.65862068965517, "grad_norm": 1.0151304006576538, "learning_rate": 2.8047816091954025e-05, "loss": 0.142, "step": 11501 }, { "epoch": 39.66206896551724, "grad_norm": 0.6416845917701721, "learning_rate": 2.804735632183908e-05, "loss": 0.1101, "step": 11502 }, { "epoch": 39.66551724137931, "grad_norm": 0.7699695825576782, "learning_rate": 2.8046896551724136e-05, "loss": 0.1133, "step": 11503 }, { "epoch": 39.66896551724138, "grad_norm": 0.8104886412620544, "learning_rate": 2.8046436781609198e-05, "loss": 0.1232, "step": 11504 }, { "epoch": 39.672413793103445, "grad_norm": 0.553966760635376, "learning_rate": 2.8045977011494253e-05, "loss": 0.1005, "step": 11505 }, { "epoch": 39.675862068965515, "grad_norm": 1.0513066053390503, "learning_rate": 2.8045517241379312e-05, "loss": 0.1107, "step": 11506 }, { "epoch": 39.679310344827584, "grad_norm": 0.8021120429039001, "learning_rate": 2.8045057471264367e-05, "loss": 0.1073, "step": 11507 }, { "epoch": 39.682758620689654, "grad_norm": 0.7450590133666992, "learning_rate": 2.8044597701149426e-05, "loss": 0.109, "step": 11508 }, { "epoch": 39.686206896551724, "grad_norm": 1.2773017883300781, "learning_rate": 2.8044137931034485e-05, "loss": 0.1083, "step": 11509 }, { "epoch": 39.689655172413794, "grad_norm": 1.550302267074585, "learning_rate": 2.804367816091954e-05, "loss": 0.1349, "step": 11510 }, { "epoch": 39.69310344827586, "grad_norm": 0.984873354434967, "learning_rate": 2.80432183908046e-05, "loss": 0.2145, "step": 11511 }, { "epoch": 39.69655172413793, "grad_norm": 0.6027946472167969, "learning_rate": 2.8042758620689657e-05, "loss": 0.1714, "step": 11512 }, { "epoch": 39.7, "grad_norm": 0.580236554145813, "learning_rate": 2.8042298850574713e-05, "loss": 0.1769, "step": 11513 }, { "epoch": 39.703448275862065, "grad_norm": 0.5701853036880493, "learning_rate": 2.804183908045977e-05, "loss": 0.1754, "step": 11514 }, { "epoch": 39.706896551724135, "grad_norm": 0.6304711699485779, "learning_rate": 2.8041379310344827e-05, "loss": 0.2023, "step": 11515 }, { "epoch": 39.710344827586205, "grad_norm": 0.8770312666893005, "learning_rate": 2.8040919540229885e-05, "loss": 0.1734, "step": 11516 }, { "epoch": 39.713793103448275, "grad_norm": 1.2147611379623413, "learning_rate": 2.8040459770114944e-05, "loss": 0.1856, "step": 11517 }, { "epoch": 39.717241379310344, "grad_norm": 2.212428331375122, "learning_rate": 2.804e-05, "loss": 0.1781, "step": 11518 }, { "epoch": 39.720689655172414, "grad_norm": 0.8960394859313965, "learning_rate": 2.8039540229885058e-05, "loss": 0.1572, "step": 11519 }, { "epoch": 39.724137931034484, "grad_norm": 0.5783913731575012, "learning_rate": 2.8039080459770117e-05, "loss": 0.1532, "step": 11520 }, { "epoch": 39.727586206896554, "grad_norm": 0.7316312193870544, "learning_rate": 2.8038620689655172e-05, "loss": 0.1415, "step": 11521 }, { "epoch": 39.73103448275862, "grad_norm": 0.8523666858673096, "learning_rate": 2.803816091954023e-05, "loss": 0.1455, "step": 11522 }, { "epoch": 39.734482758620686, "grad_norm": 0.8479415774345398, "learning_rate": 2.8037701149425286e-05, "loss": 0.1312, "step": 11523 }, { "epoch": 39.737931034482756, "grad_norm": 2.0010015964508057, "learning_rate": 2.8037241379310348e-05, "loss": 0.1433, "step": 11524 }, { "epoch": 39.741379310344826, "grad_norm": 0.8007112145423889, "learning_rate": 2.8036781609195403e-05, "loss": 0.1605, "step": 11525 }, { "epoch": 39.744827586206895, "grad_norm": 0.9121559858322144, "learning_rate": 2.803632183908046e-05, "loss": 0.1324, "step": 11526 }, { "epoch": 39.748275862068965, "grad_norm": 0.8926279544830322, "learning_rate": 2.8035862068965517e-05, "loss": 0.125, "step": 11527 }, { "epoch": 39.751724137931035, "grad_norm": 0.7816924452781677, "learning_rate": 2.8035402298850576e-05, "loss": 0.1186, "step": 11528 }, { "epoch": 39.755172413793105, "grad_norm": 1.2233219146728516, "learning_rate": 2.8034942528735635e-05, "loss": 0.1261, "step": 11529 }, { "epoch": 39.758620689655174, "grad_norm": 0.7805418968200684, "learning_rate": 2.803448275862069e-05, "loss": 0.1014, "step": 11530 }, { "epoch": 39.762068965517244, "grad_norm": 1.45162034034729, "learning_rate": 2.8034022988505745e-05, "loss": 0.1391, "step": 11531 }, { "epoch": 39.765517241379314, "grad_norm": 1.2744754552841187, "learning_rate": 2.8033563218390807e-05, "loss": 0.1247, "step": 11532 }, { "epoch": 39.76896551724138, "grad_norm": 1.2398245334625244, "learning_rate": 2.8033103448275863e-05, "loss": 0.132, "step": 11533 }, { "epoch": 39.772413793103446, "grad_norm": 1.2867166996002197, "learning_rate": 2.803264367816092e-05, "loss": 0.1537, "step": 11534 }, { "epoch": 39.775862068965516, "grad_norm": 0.988305926322937, "learning_rate": 2.8032183908045976e-05, "loss": 0.1809, "step": 11535 }, { "epoch": 39.779310344827586, "grad_norm": 2.060601234436035, "learning_rate": 2.8031724137931035e-05, "loss": 0.2203, "step": 11536 }, { "epoch": 39.782758620689656, "grad_norm": 0.9540982246398926, "learning_rate": 2.8031264367816094e-05, "loss": 0.1739, "step": 11537 }, { "epoch": 39.786206896551725, "grad_norm": 0.5264396667480469, "learning_rate": 2.803080459770115e-05, "loss": 0.2009, "step": 11538 }, { "epoch": 39.789655172413795, "grad_norm": 0.5899782776832581, "learning_rate": 2.8030344827586208e-05, "loss": 0.1785, "step": 11539 }, { "epoch": 39.793103448275865, "grad_norm": 1.5404542684555054, "learning_rate": 2.8029885057471267e-05, "loss": 0.1523, "step": 11540 }, { "epoch": 39.796551724137935, "grad_norm": 0.8499155044555664, "learning_rate": 2.8029425287356322e-05, "loss": 0.1857, "step": 11541 }, { "epoch": 39.8, "grad_norm": 0.8888062238693237, "learning_rate": 2.802896551724138e-05, "loss": 0.1869, "step": 11542 }, { "epoch": 39.80344827586207, "grad_norm": 0.5029839277267456, "learning_rate": 2.8028505747126436e-05, "loss": 0.1654, "step": 11543 }, { "epoch": 39.80689655172414, "grad_norm": 0.7781130075454712, "learning_rate": 2.8028045977011494e-05, "loss": 0.1656, "step": 11544 }, { "epoch": 39.810344827586206, "grad_norm": 0.8604413866996765, "learning_rate": 2.8027586206896553e-05, "loss": 0.1457, "step": 11545 }, { "epoch": 39.813793103448276, "grad_norm": 1.0199977159500122, "learning_rate": 2.802712643678161e-05, "loss": 0.1512, "step": 11546 }, { "epoch": 39.817241379310346, "grad_norm": 0.7425589561462402, "learning_rate": 2.8026666666666667e-05, "loss": 0.1566, "step": 11547 }, { "epoch": 39.820689655172416, "grad_norm": 0.622673511505127, "learning_rate": 2.8026206896551726e-05, "loss": 0.1635, "step": 11548 }, { "epoch": 39.824137931034485, "grad_norm": 0.5950734615325928, "learning_rate": 2.802574712643678e-05, "loss": 0.1699, "step": 11549 }, { "epoch": 39.827586206896555, "grad_norm": 0.6048804521560669, "learning_rate": 2.802528735632184e-05, "loss": 0.1232, "step": 11550 }, { "epoch": 39.83103448275862, "grad_norm": 0.9689356088638306, "learning_rate": 2.8024827586206895e-05, "loss": 0.1377, "step": 11551 }, { "epoch": 39.83448275862069, "grad_norm": 0.9682617783546448, "learning_rate": 2.8024367816091957e-05, "loss": 0.1387, "step": 11552 }, { "epoch": 39.83793103448276, "grad_norm": 1.2814329862594604, "learning_rate": 2.8023908045977012e-05, "loss": 0.1251, "step": 11553 }, { "epoch": 39.84137931034483, "grad_norm": 0.9389997124671936, "learning_rate": 2.8023448275862068e-05, "loss": 0.1181, "step": 11554 }, { "epoch": 39.8448275862069, "grad_norm": 0.8210214376449585, "learning_rate": 2.8022988505747126e-05, "loss": 0.1261, "step": 11555 }, { "epoch": 39.84827586206897, "grad_norm": 0.7662495970726013, "learning_rate": 2.8022528735632185e-05, "loss": 0.1318, "step": 11556 }, { "epoch": 39.851724137931036, "grad_norm": 0.8982017636299133, "learning_rate": 2.8022068965517244e-05, "loss": 0.0962, "step": 11557 }, { "epoch": 39.855172413793106, "grad_norm": 1.0896496772766113, "learning_rate": 2.80216091954023e-05, "loss": 0.1334, "step": 11558 }, { "epoch": 39.858620689655176, "grad_norm": 0.8507948517799377, "learning_rate": 2.8021149425287354e-05, "loss": 0.1229, "step": 11559 }, { "epoch": 39.86206896551724, "grad_norm": 1.1775119304656982, "learning_rate": 2.8020689655172416e-05, "loss": 0.15, "step": 11560 }, { "epoch": 39.86551724137931, "grad_norm": 1.5011897087097168, "learning_rate": 2.8020229885057472e-05, "loss": 0.2115, "step": 11561 }, { "epoch": 39.86896551724138, "grad_norm": 0.7803739309310913, "learning_rate": 2.801977011494253e-05, "loss": 0.201, "step": 11562 }, { "epoch": 39.87241379310345, "grad_norm": 0.5539829730987549, "learning_rate": 2.8019310344827586e-05, "loss": 0.1819, "step": 11563 }, { "epoch": 39.87586206896552, "grad_norm": 0.5139386653900146, "learning_rate": 2.8018850574712644e-05, "loss": 0.1932, "step": 11564 }, { "epoch": 39.87931034482759, "grad_norm": 1.0221246480941772, "learning_rate": 2.8018390804597703e-05, "loss": 0.162, "step": 11565 }, { "epoch": 39.88275862068966, "grad_norm": 0.8142642974853516, "learning_rate": 2.801793103448276e-05, "loss": 0.1623, "step": 11566 }, { "epoch": 39.88620689655173, "grad_norm": 1.0034339427947998, "learning_rate": 2.8017471264367817e-05, "loss": 0.1818, "step": 11567 }, { "epoch": 39.889655172413796, "grad_norm": 0.7470972537994385, "learning_rate": 2.8017011494252876e-05, "loss": 0.1666, "step": 11568 }, { "epoch": 39.89310344827586, "grad_norm": 0.6089482307434082, "learning_rate": 2.801655172413793e-05, "loss": 0.1771, "step": 11569 }, { "epoch": 39.89655172413793, "grad_norm": 0.8579623699188232, "learning_rate": 2.801609195402299e-05, "loss": 0.1618, "step": 11570 }, { "epoch": 39.9, "grad_norm": 0.6752385497093201, "learning_rate": 2.8015632183908045e-05, "loss": 0.1445, "step": 11571 }, { "epoch": 39.90344827586207, "grad_norm": 0.662358283996582, "learning_rate": 2.8015172413793104e-05, "loss": 0.1567, "step": 11572 }, { "epoch": 39.90689655172414, "grad_norm": 0.7682713866233826, "learning_rate": 2.8014712643678162e-05, "loss": 0.1512, "step": 11573 }, { "epoch": 39.91034482758621, "grad_norm": 0.8414244651794434, "learning_rate": 2.8014252873563218e-05, "loss": 0.1561, "step": 11574 }, { "epoch": 39.91379310344828, "grad_norm": 0.9113763570785522, "learning_rate": 2.8013793103448276e-05, "loss": 0.1399, "step": 11575 }, { "epoch": 39.91724137931035, "grad_norm": 0.8288007378578186, "learning_rate": 2.8013333333333335e-05, "loss": 0.1484, "step": 11576 }, { "epoch": 39.92068965517242, "grad_norm": 1.0269458293914795, "learning_rate": 2.801287356321839e-05, "loss": 0.1322, "step": 11577 }, { "epoch": 39.92413793103448, "grad_norm": 0.6786368489265442, "learning_rate": 2.801241379310345e-05, "loss": 0.1202, "step": 11578 }, { "epoch": 39.92758620689655, "grad_norm": 1.5795526504516602, "learning_rate": 2.8011954022988504e-05, "loss": 0.1332, "step": 11579 }, { "epoch": 39.93103448275862, "grad_norm": 0.8821081519126892, "learning_rate": 2.8011494252873566e-05, "loss": 0.1117, "step": 11580 }, { "epoch": 39.93448275862069, "grad_norm": 0.7510960102081299, "learning_rate": 2.801103448275862e-05, "loss": 0.121, "step": 11581 }, { "epoch": 39.93793103448276, "grad_norm": 0.9274259209632874, "learning_rate": 2.8010574712643677e-05, "loss": 0.1234, "step": 11582 }, { "epoch": 39.94137931034483, "grad_norm": 0.8250483274459839, "learning_rate": 2.8010114942528736e-05, "loss": 0.114, "step": 11583 }, { "epoch": 39.9448275862069, "grad_norm": 0.865753173828125, "learning_rate": 2.8009655172413794e-05, "loss": 0.1243, "step": 11584 }, { "epoch": 39.94827586206897, "grad_norm": 1.1547582149505615, "learning_rate": 2.8009195402298853e-05, "loss": 0.1898, "step": 11585 }, { "epoch": 39.95172413793104, "grad_norm": 1.0319417715072632, "learning_rate": 2.800873563218391e-05, "loss": 0.2102, "step": 11586 }, { "epoch": 39.9551724137931, "grad_norm": 0.5368159413337708, "learning_rate": 2.8008275862068964e-05, "loss": 0.1818, "step": 11587 }, { "epoch": 39.95862068965517, "grad_norm": 0.5381379723548889, "learning_rate": 2.8007816091954026e-05, "loss": 0.1835, "step": 11588 }, { "epoch": 39.96206896551724, "grad_norm": 1.0764422416687012, "learning_rate": 2.800735632183908e-05, "loss": 0.172, "step": 11589 }, { "epoch": 39.96551724137931, "grad_norm": 0.5636780858039856, "learning_rate": 2.800689655172414e-05, "loss": 0.1434, "step": 11590 }, { "epoch": 39.96896551724138, "grad_norm": 1.5343387126922607, "learning_rate": 2.8006436781609195e-05, "loss": 0.1636, "step": 11591 }, { "epoch": 39.97241379310345, "grad_norm": 0.7999375462532043, "learning_rate": 2.8005977011494254e-05, "loss": 0.1614, "step": 11592 }, { "epoch": 39.97586206896552, "grad_norm": 1.0915523767471313, "learning_rate": 2.8005517241379312e-05, "loss": 0.1361, "step": 11593 }, { "epoch": 39.97931034482759, "grad_norm": 0.6097292304039001, "learning_rate": 2.8005057471264368e-05, "loss": 0.1432, "step": 11594 }, { "epoch": 39.98275862068966, "grad_norm": 0.9179913997650146, "learning_rate": 2.8004597701149426e-05, "loss": 0.1488, "step": 11595 }, { "epoch": 39.98620689655172, "grad_norm": 0.8879362940788269, "learning_rate": 2.8004137931034485e-05, "loss": 0.1519, "step": 11596 }, { "epoch": 39.98965517241379, "grad_norm": 0.8876436948776245, "learning_rate": 2.800367816091954e-05, "loss": 0.1216, "step": 11597 }, { "epoch": 39.99310344827586, "grad_norm": 1.6118890047073364, "learning_rate": 2.80032183908046e-05, "loss": 0.116, "step": 11598 }, { "epoch": 39.99655172413793, "grad_norm": 1.871911644935608, "learning_rate": 2.8002758620689654e-05, "loss": 0.1281, "step": 11599 }, { "epoch": 40.0, "grad_norm": 0.9541428089141846, "learning_rate": 2.8002298850574713e-05, "loss": 0.2223, "step": 11600 }, { "epoch": 40.00344827586207, "grad_norm": 0.5181897282600403, "learning_rate": 2.800183908045977e-05, "loss": 0.2095, "step": 11601 }, { "epoch": 40.00689655172414, "grad_norm": 0.5921854972839355, "learning_rate": 2.8001379310344827e-05, "loss": 0.1867, "step": 11602 }, { "epoch": 40.01034482758621, "grad_norm": 0.48063069581985474, "learning_rate": 2.8000919540229886e-05, "loss": 0.1516, "step": 11603 }, { "epoch": 40.01379310344828, "grad_norm": 0.5444591641426086, "learning_rate": 2.8000459770114944e-05, "loss": 0.1567, "step": 11604 }, { "epoch": 40.01724137931034, "grad_norm": 1.1245945692062378, "learning_rate": 2.8e-05, "loss": 0.1698, "step": 11605 }, { "epoch": 40.02068965517241, "grad_norm": 0.5311099886894226, "learning_rate": 2.7999540229885058e-05, "loss": 0.1673, "step": 11606 }, { "epoch": 40.02413793103448, "grad_norm": 0.8394598960876465, "learning_rate": 2.7999080459770114e-05, "loss": 0.1573, "step": 11607 }, { "epoch": 40.02758620689655, "grad_norm": 0.6501821279525757, "learning_rate": 2.7998620689655176e-05, "loss": 0.1354, "step": 11608 }, { "epoch": 40.03103448275862, "grad_norm": 0.5301545858383179, "learning_rate": 2.799816091954023e-05, "loss": 0.1545, "step": 11609 }, { "epoch": 40.03448275862069, "grad_norm": 0.547879695892334, "learning_rate": 2.7997701149425286e-05, "loss": 0.1451, "step": 11610 }, { "epoch": 40.03793103448276, "grad_norm": 0.5859290361404419, "learning_rate": 2.7997241379310345e-05, "loss": 0.135, "step": 11611 }, { "epoch": 40.04137931034483, "grad_norm": 0.8631305694580078, "learning_rate": 2.7996781609195404e-05, "loss": 0.1417, "step": 11612 }, { "epoch": 40.0448275862069, "grad_norm": 0.6690996885299683, "learning_rate": 2.7996321839080462e-05, "loss": 0.1219, "step": 11613 }, { "epoch": 40.04827586206896, "grad_norm": 0.5393000841140747, "learning_rate": 2.7995862068965518e-05, "loss": 0.1277, "step": 11614 }, { "epoch": 40.05172413793103, "grad_norm": 0.7986444234848022, "learning_rate": 2.7995402298850573e-05, "loss": 0.121, "step": 11615 }, { "epoch": 40.0551724137931, "grad_norm": 0.9650202989578247, "learning_rate": 2.7994942528735635e-05, "loss": 0.1264, "step": 11616 }, { "epoch": 40.05862068965517, "grad_norm": 1.3113383054733276, "learning_rate": 2.799448275862069e-05, "loss": 0.1108, "step": 11617 }, { "epoch": 40.06206896551724, "grad_norm": 1.6393826007843018, "learning_rate": 2.799402298850575e-05, "loss": 0.1163, "step": 11618 }, { "epoch": 40.06551724137931, "grad_norm": 0.690750002861023, "learning_rate": 2.7993563218390804e-05, "loss": 0.1079, "step": 11619 }, { "epoch": 40.06896551724138, "grad_norm": 0.5626112222671509, "learning_rate": 2.7993103448275863e-05, "loss": 0.1003, "step": 11620 }, { "epoch": 40.07241379310345, "grad_norm": 0.7423434257507324, "learning_rate": 2.799264367816092e-05, "loss": 0.117, "step": 11621 }, { "epoch": 40.07586206896552, "grad_norm": 0.6133210062980652, "learning_rate": 2.7992183908045977e-05, "loss": 0.0907, "step": 11622 }, { "epoch": 40.07931034482758, "grad_norm": 0.8096251487731934, "learning_rate": 2.7991724137931036e-05, "loss": 0.0993, "step": 11623 }, { "epoch": 40.08275862068965, "grad_norm": 1.315021276473999, "learning_rate": 2.7991264367816094e-05, "loss": 0.1343, "step": 11624 }, { "epoch": 40.08620689655172, "grad_norm": 1.6839079856872559, "learning_rate": 2.799080459770115e-05, "loss": 0.1622, "step": 11625 }, { "epoch": 40.08965517241379, "grad_norm": 0.5504493713378906, "learning_rate": 2.7990344827586208e-05, "loss": 0.1995, "step": 11626 }, { "epoch": 40.09310344827586, "grad_norm": 0.6462870836257935, "learning_rate": 2.7989885057471263e-05, "loss": 0.1762, "step": 11627 }, { "epoch": 40.09655172413793, "grad_norm": 0.6583945751190186, "learning_rate": 2.7989425287356326e-05, "loss": 0.1676, "step": 11628 }, { "epoch": 40.1, "grad_norm": 0.4450218975543976, "learning_rate": 2.798896551724138e-05, "loss": 0.1519, "step": 11629 }, { "epoch": 40.10344827586207, "grad_norm": 0.5877785682678223, "learning_rate": 2.7988505747126436e-05, "loss": 0.1409, "step": 11630 }, { "epoch": 40.10689655172414, "grad_norm": 0.7537900805473328, "learning_rate": 2.7988045977011495e-05, "loss": 0.1532, "step": 11631 }, { "epoch": 40.110344827586204, "grad_norm": 0.9696903228759766, "learning_rate": 2.7987586206896554e-05, "loss": 0.1627, "step": 11632 }, { "epoch": 40.11379310344827, "grad_norm": 0.7010102868080139, "learning_rate": 2.798712643678161e-05, "loss": 0.1424, "step": 11633 }, { "epoch": 40.11724137931034, "grad_norm": 0.8179371356964111, "learning_rate": 2.7986666666666668e-05, "loss": 0.1398, "step": 11634 }, { "epoch": 40.12068965517241, "grad_norm": 0.5714012980461121, "learning_rate": 2.7986206896551723e-05, "loss": 0.1534, "step": 11635 }, { "epoch": 40.12413793103448, "grad_norm": 0.6157788038253784, "learning_rate": 2.7985747126436785e-05, "loss": 0.1157, "step": 11636 }, { "epoch": 40.12758620689655, "grad_norm": 0.6693355441093445, "learning_rate": 2.798528735632184e-05, "loss": 0.1279, "step": 11637 }, { "epoch": 40.13103448275862, "grad_norm": 0.7183413505554199, "learning_rate": 2.7984827586206895e-05, "loss": 0.1077, "step": 11638 }, { "epoch": 40.13448275862069, "grad_norm": 0.5054282546043396, "learning_rate": 2.7984367816091954e-05, "loss": 0.1306, "step": 11639 }, { "epoch": 40.13793103448276, "grad_norm": 0.5936710834503174, "learning_rate": 2.7983908045977013e-05, "loss": 0.1341, "step": 11640 }, { "epoch": 40.141379310344824, "grad_norm": 0.6644858121871948, "learning_rate": 2.798344827586207e-05, "loss": 0.1316, "step": 11641 }, { "epoch": 40.144827586206894, "grad_norm": 0.8058156967163086, "learning_rate": 2.7982988505747127e-05, "loss": 0.1149, "step": 11642 }, { "epoch": 40.148275862068964, "grad_norm": 1.2202357053756714, "learning_rate": 2.7982528735632182e-05, "loss": 0.1192, "step": 11643 }, { "epoch": 40.15172413793103, "grad_norm": 0.7190808653831482, "learning_rate": 2.7982068965517244e-05, "loss": 0.1127, "step": 11644 }, { "epoch": 40.1551724137931, "grad_norm": 1.0018106698989868, "learning_rate": 2.79816091954023e-05, "loss": 0.0967, "step": 11645 }, { "epoch": 40.15862068965517, "grad_norm": 0.6785632967948914, "learning_rate": 2.7981149425287358e-05, "loss": 0.1221, "step": 11646 }, { "epoch": 40.16206896551724, "grad_norm": 0.8175433278083801, "learning_rate": 2.7980689655172413e-05, "loss": 0.1115, "step": 11647 }, { "epoch": 40.16551724137931, "grad_norm": 1.1953258514404297, "learning_rate": 2.7980229885057472e-05, "loss": 0.0901, "step": 11648 }, { "epoch": 40.16896551724138, "grad_norm": 0.7493051290512085, "learning_rate": 2.797977011494253e-05, "loss": 0.1049, "step": 11649 }, { "epoch": 40.172413793103445, "grad_norm": 0.9021804332733154, "learning_rate": 2.7979310344827586e-05, "loss": 0.1565, "step": 11650 }, { "epoch": 40.175862068965515, "grad_norm": 0.6075196266174316, "learning_rate": 2.7978850574712645e-05, "loss": 0.221, "step": 11651 }, { "epoch": 40.179310344827584, "grad_norm": 0.6591607332229614, "learning_rate": 2.7978390804597703e-05, "loss": 0.1739, "step": 11652 }, { "epoch": 40.182758620689654, "grad_norm": 0.555267333984375, "learning_rate": 2.797793103448276e-05, "loss": 0.1842, "step": 11653 }, { "epoch": 40.186206896551724, "grad_norm": 0.9013941884040833, "learning_rate": 2.7977471264367817e-05, "loss": 0.1607, "step": 11654 }, { "epoch": 40.189655172413794, "grad_norm": 0.6267364025115967, "learning_rate": 2.7977011494252873e-05, "loss": 0.1621, "step": 11655 }, { "epoch": 40.19310344827586, "grad_norm": 0.5677948594093323, "learning_rate": 2.7976551724137935e-05, "loss": 0.1452, "step": 11656 }, { "epoch": 40.19655172413793, "grad_norm": 0.502986490726471, "learning_rate": 2.797609195402299e-05, "loss": 0.1669, "step": 11657 }, { "epoch": 40.2, "grad_norm": 0.6021277904510498, "learning_rate": 2.7975632183908045e-05, "loss": 0.1313, "step": 11658 }, { "epoch": 40.203448275862065, "grad_norm": 0.6180835962295532, "learning_rate": 2.7975172413793104e-05, "loss": 0.1562, "step": 11659 }, { "epoch": 40.206896551724135, "grad_norm": 1.2324230670928955, "learning_rate": 2.7974712643678163e-05, "loss": 0.1376, "step": 11660 }, { "epoch": 40.210344827586205, "grad_norm": 0.6263248920440674, "learning_rate": 2.7974252873563218e-05, "loss": 0.1569, "step": 11661 }, { "epoch": 40.213793103448275, "grad_norm": 0.7797892093658447, "learning_rate": 2.7973793103448277e-05, "loss": 0.1315, "step": 11662 }, { "epoch": 40.217241379310344, "grad_norm": 0.5494678616523743, "learning_rate": 2.7973333333333332e-05, "loss": 0.1314, "step": 11663 }, { "epoch": 40.220689655172414, "grad_norm": 0.5808693766593933, "learning_rate": 2.7972873563218394e-05, "loss": 0.1028, "step": 11664 }, { "epoch": 40.224137931034484, "grad_norm": 0.5643265247344971, "learning_rate": 2.797241379310345e-05, "loss": 0.1202, "step": 11665 }, { "epoch": 40.227586206896554, "grad_norm": 0.7029027342796326, "learning_rate": 2.7971954022988505e-05, "loss": 0.1092, "step": 11666 }, { "epoch": 40.23103448275862, "grad_norm": 0.7036227583885193, "learning_rate": 2.7971494252873563e-05, "loss": 0.1246, "step": 11667 }, { "epoch": 40.234482758620686, "grad_norm": 0.5897785425186157, "learning_rate": 2.7971034482758622e-05, "loss": 0.1088, "step": 11668 }, { "epoch": 40.237931034482756, "grad_norm": 0.7039844393730164, "learning_rate": 2.797057471264368e-05, "loss": 0.1092, "step": 11669 }, { "epoch": 40.241379310344826, "grad_norm": 1.0365760326385498, "learning_rate": 2.7970114942528736e-05, "loss": 0.111, "step": 11670 }, { "epoch": 40.244827586206895, "grad_norm": 1.865059733390808, "learning_rate": 2.796965517241379e-05, "loss": 0.1308, "step": 11671 }, { "epoch": 40.248275862068965, "grad_norm": 0.6405381560325623, "learning_rate": 2.7969195402298853e-05, "loss": 0.0902, "step": 11672 }, { "epoch": 40.251724137931035, "grad_norm": 2.853517532348633, "learning_rate": 2.796873563218391e-05, "loss": 0.092, "step": 11673 }, { "epoch": 40.255172413793105, "grad_norm": 1.1927621364593506, "learning_rate": 2.7968275862068967e-05, "loss": 0.1035, "step": 11674 }, { "epoch": 40.258620689655174, "grad_norm": 1.0837582349777222, "learning_rate": 2.7967816091954023e-05, "loss": 0.1199, "step": 11675 }, { "epoch": 40.262068965517244, "grad_norm": 0.8633678555488586, "learning_rate": 2.796735632183908e-05, "loss": 0.227, "step": 11676 }, { "epoch": 40.265517241379314, "grad_norm": 0.7727140784263611, "learning_rate": 2.796689655172414e-05, "loss": 0.165, "step": 11677 }, { "epoch": 40.26896551724138, "grad_norm": 0.794918954372406, "learning_rate": 2.7966436781609195e-05, "loss": 0.1749, "step": 11678 }, { "epoch": 40.272413793103446, "grad_norm": 0.49571534991264343, "learning_rate": 2.7965977011494254e-05, "loss": 0.1662, "step": 11679 }, { "epoch": 40.275862068965516, "grad_norm": 0.8848534822463989, "learning_rate": 2.7965517241379313e-05, "loss": 0.148, "step": 11680 }, { "epoch": 40.279310344827586, "grad_norm": 0.9959067702293396, "learning_rate": 2.7965057471264368e-05, "loss": 0.1543, "step": 11681 }, { "epoch": 40.282758620689656, "grad_norm": 2.4409101009368896, "learning_rate": 2.7964597701149427e-05, "loss": 0.1506, "step": 11682 }, { "epoch": 40.286206896551725, "grad_norm": 0.6493564248085022, "learning_rate": 2.7964137931034482e-05, "loss": 0.1648, "step": 11683 }, { "epoch": 40.289655172413795, "grad_norm": 0.7148909568786621, "learning_rate": 2.7963678160919544e-05, "loss": 0.1441, "step": 11684 }, { "epoch": 40.293103448275865, "grad_norm": 0.6141997575759888, "learning_rate": 2.79632183908046e-05, "loss": 0.1395, "step": 11685 }, { "epoch": 40.296551724137935, "grad_norm": 0.6709399819374084, "learning_rate": 2.7962758620689655e-05, "loss": 0.15, "step": 11686 }, { "epoch": 40.3, "grad_norm": 1.1454461812973022, "learning_rate": 2.7962298850574713e-05, "loss": 0.1721, "step": 11687 }, { "epoch": 40.30344827586207, "grad_norm": 1.5469913482666016, "learning_rate": 2.7961839080459772e-05, "loss": 0.1346, "step": 11688 }, { "epoch": 40.30689655172414, "grad_norm": 0.8761661648750305, "learning_rate": 2.7961379310344827e-05, "loss": 0.1239, "step": 11689 }, { "epoch": 40.310344827586206, "grad_norm": 0.8666542172431946, "learning_rate": 2.7960919540229886e-05, "loss": 0.1427, "step": 11690 }, { "epoch": 40.313793103448276, "grad_norm": 0.6497369408607483, "learning_rate": 2.796045977011494e-05, "loss": 0.1076, "step": 11691 }, { "epoch": 40.317241379310346, "grad_norm": 0.7148862481117249, "learning_rate": 2.7960000000000003e-05, "loss": 0.1267, "step": 11692 }, { "epoch": 40.320689655172416, "grad_norm": 0.9966643452644348, "learning_rate": 2.795954022988506e-05, "loss": 0.1176, "step": 11693 }, { "epoch": 40.324137931034485, "grad_norm": 0.8974887132644653, "learning_rate": 2.7959080459770114e-05, "loss": 0.1207, "step": 11694 }, { "epoch": 40.327586206896555, "grad_norm": 0.629341185092926, "learning_rate": 2.7958620689655173e-05, "loss": 0.1185, "step": 11695 }, { "epoch": 40.33103448275862, "grad_norm": 0.8130760788917542, "learning_rate": 2.795816091954023e-05, "loss": 0.1262, "step": 11696 }, { "epoch": 40.33448275862069, "grad_norm": 0.7954714894294739, "learning_rate": 2.795770114942529e-05, "loss": 0.0981, "step": 11697 }, { "epoch": 40.33793103448276, "grad_norm": 0.5662274956703186, "learning_rate": 2.7957241379310345e-05, "loss": 0.1151, "step": 11698 }, { "epoch": 40.34137931034483, "grad_norm": 0.8570734858512878, "learning_rate": 2.79567816091954e-05, "loss": 0.124, "step": 11699 }, { "epoch": 40.3448275862069, "grad_norm": 1.1559773683547974, "learning_rate": 2.7956321839080463e-05, "loss": 0.1374, "step": 11700 }, { "epoch": 40.34827586206897, "grad_norm": 0.6159823536872864, "learning_rate": 2.7955862068965518e-05, "loss": 0.2043, "step": 11701 }, { "epoch": 40.351724137931036, "grad_norm": 0.5950391292572021, "learning_rate": 2.7955402298850577e-05, "loss": 0.1681, "step": 11702 }, { "epoch": 40.355172413793106, "grad_norm": 0.4927203059196472, "learning_rate": 2.7954942528735632e-05, "loss": 0.167, "step": 11703 }, { "epoch": 40.358620689655176, "grad_norm": 1.321279764175415, "learning_rate": 2.795448275862069e-05, "loss": 0.1613, "step": 11704 }, { "epoch": 40.36206896551724, "grad_norm": 0.8601357936859131, "learning_rate": 2.795402298850575e-05, "loss": 0.1641, "step": 11705 }, { "epoch": 40.36551724137931, "grad_norm": 1.159642219543457, "learning_rate": 2.7953563218390805e-05, "loss": 0.1678, "step": 11706 }, { "epoch": 40.36896551724138, "grad_norm": 0.8981372714042664, "learning_rate": 2.7953103448275863e-05, "loss": 0.1828, "step": 11707 }, { "epoch": 40.37241379310345, "grad_norm": 0.6486225724220276, "learning_rate": 2.7952643678160922e-05, "loss": 0.178, "step": 11708 }, { "epoch": 40.37586206896552, "grad_norm": 0.7072604298591614, "learning_rate": 2.7952183908045977e-05, "loss": 0.1514, "step": 11709 }, { "epoch": 40.37931034482759, "grad_norm": 0.5991432070732117, "learning_rate": 2.7951724137931036e-05, "loss": 0.1413, "step": 11710 }, { "epoch": 40.38275862068966, "grad_norm": 0.9521961212158203, "learning_rate": 2.795126436781609e-05, "loss": 0.1206, "step": 11711 }, { "epoch": 40.38620689655173, "grad_norm": 1.9931718111038208, "learning_rate": 2.7950804597701153e-05, "loss": 0.177, "step": 11712 }, { "epoch": 40.389655172413796, "grad_norm": 1.1548880338668823, "learning_rate": 2.795034482758621e-05, "loss": 0.1368, "step": 11713 }, { "epoch": 40.39310344827586, "grad_norm": 0.6284099817276001, "learning_rate": 2.7949885057471264e-05, "loss": 0.1328, "step": 11714 }, { "epoch": 40.39655172413793, "grad_norm": 0.8119350671768188, "learning_rate": 2.7949425287356323e-05, "loss": 0.1034, "step": 11715 }, { "epoch": 40.4, "grad_norm": 0.7650372982025146, "learning_rate": 2.794896551724138e-05, "loss": 0.1515, "step": 11716 }, { "epoch": 40.40344827586207, "grad_norm": 0.6360500454902649, "learning_rate": 2.794850574712644e-05, "loss": 0.1262, "step": 11717 }, { "epoch": 40.40689655172414, "grad_norm": 0.731512725353241, "learning_rate": 2.7948045977011495e-05, "loss": 0.1173, "step": 11718 }, { "epoch": 40.41034482758621, "grad_norm": 0.5277930498123169, "learning_rate": 2.794758620689655e-05, "loss": 0.1159, "step": 11719 }, { "epoch": 40.41379310344828, "grad_norm": 0.7175533771514893, "learning_rate": 2.7947126436781613e-05, "loss": 0.1156, "step": 11720 }, { "epoch": 40.41724137931035, "grad_norm": 0.6196317076683044, "learning_rate": 2.7946666666666668e-05, "loss": 0.1016, "step": 11721 }, { "epoch": 40.42068965517242, "grad_norm": 0.7266805768013, "learning_rate": 2.7946206896551723e-05, "loss": 0.1029, "step": 11722 }, { "epoch": 40.42413793103448, "grad_norm": 2.7246503829956055, "learning_rate": 2.7945747126436782e-05, "loss": 0.1045, "step": 11723 }, { "epoch": 40.42758620689655, "grad_norm": 0.7863669395446777, "learning_rate": 2.794528735632184e-05, "loss": 0.099, "step": 11724 }, { "epoch": 40.43103448275862, "grad_norm": 1.2347368001937866, "learning_rate": 2.79448275862069e-05, "loss": 0.1559, "step": 11725 }, { "epoch": 40.43448275862069, "grad_norm": 1.5440723896026611, "learning_rate": 2.7944367816091955e-05, "loss": 0.2245, "step": 11726 }, { "epoch": 40.43793103448276, "grad_norm": 0.5833970308303833, "learning_rate": 2.794390804597701e-05, "loss": 0.1639, "step": 11727 }, { "epoch": 40.44137931034483, "grad_norm": 1.124950885772705, "learning_rate": 2.7943448275862072e-05, "loss": 0.1765, "step": 11728 }, { "epoch": 40.4448275862069, "grad_norm": 0.7326246500015259, "learning_rate": 2.7942988505747127e-05, "loss": 0.1669, "step": 11729 }, { "epoch": 40.44827586206897, "grad_norm": 0.7290887832641602, "learning_rate": 2.7942528735632186e-05, "loss": 0.1556, "step": 11730 }, { "epoch": 40.45172413793104, "grad_norm": 1.0433979034423828, "learning_rate": 2.794206896551724e-05, "loss": 0.1513, "step": 11731 }, { "epoch": 40.4551724137931, "grad_norm": 0.5453125834465027, "learning_rate": 2.79416091954023e-05, "loss": 0.1647, "step": 11732 }, { "epoch": 40.45862068965517, "grad_norm": 0.9280747771263123, "learning_rate": 2.794114942528736e-05, "loss": 0.1586, "step": 11733 }, { "epoch": 40.46206896551724, "grad_norm": 1.8095715045928955, "learning_rate": 2.7940689655172414e-05, "loss": 0.134, "step": 11734 }, { "epoch": 40.46551724137931, "grad_norm": 0.9350835084915161, "learning_rate": 2.7940229885057473e-05, "loss": 0.1379, "step": 11735 }, { "epoch": 40.46896551724138, "grad_norm": 1.448866605758667, "learning_rate": 2.793977011494253e-05, "loss": 0.1383, "step": 11736 }, { "epoch": 40.47241379310345, "grad_norm": 0.825294017791748, "learning_rate": 2.7939310344827586e-05, "loss": 0.1382, "step": 11737 }, { "epoch": 40.47586206896552, "grad_norm": 1.2684051990509033, "learning_rate": 2.7938850574712645e-05, "loss": 0.1382, "step": 11738 }, { "epoch": 40.47931034482759, "grad_norm": 0.5286739468574524, "learning_rate": 2.79383908045977e-05, "loss": 0.1308, "step": 11739 }, { "epoch": 40.48275862068966, "grad_norm": 0.7574636340141296, "learning_rate": 2.7937931034482763e-05, "loss": 0.141, "step": 11740 }, { "epoch": 40.48620689655172, "grad_norm": 0.7143364548683167, "learning_rate": 2.7937471264367818e-05, "loss": 0.1255, "step": 11741 }, { "epoch": 40.48965517241379, "grad_norm": 0.687987208366394, "learning_rate": 2.7937011494252873e-05, "loss": 0.1132, "step": 11742 }, { "epoch": 40.49310344827586, "grad_norm": 0.7487812638282776, "learning_rate": 2.7936551724137932e-05, "loss": 0.1155, "step": 11743 }, { "epoch": 40.49655172413793, "grad_norm": 1.6839125156402588, "learning_rate": 2.793609195402299e-05, "loss": 0.11, "step": 11744 }, { "epoch": 40.5, "grad_norm": 0.7727195024490356, "learning_rate": 2.793563218390805e-05, "loss": 0.1211, "step": 11745 }, { "epoch": 40.50344827586207, "grad_norm": 0.6054051518440247, "learning_rate": 2.7935172413793104e-05, "loss": 0.0913, "step": 11746 }, { "epoch": 40.50689655172414, "grad_norm": 0.6255416870117188, "learning_rate": 2.793471264367816e-05, "loss": 0.0832, "step": 11747 }, { "epoch": 40.51034482758621, "grad_norm": 1.1705973148345947, "learning_rate": 2.793425287356322e-05, "loss": 0.112, "step": 11748 }, { "epoch": 40.51379310344828, "grad_norm": 1.0925915241241455, "learning_rate": 2.7933793103448277e-05, "loss": 0.1019, "step": 11749 }, { "epoch": 40.51724137931034, "grad_norm": 0.877522349357605, "learning_rate": 2.7933333333333332e-05, "loss": 0.1196, "step": 11750 }, { "epoch": 40.52068965517241, "grad_norm": 0.7351245880126953, "learning_rate": 2.793287356321839e-05, "loss": 0.2012, "step": 11751 }, { "epoch": 40.52413793103448, "grad_norm": 0.7945444583892822, "learning_rate": 2.7932413793103446e-05, "loss": 0.1697, "step": 11752 }, { "epoch": 40.52758620689655, "grad_norm": 0.6955210566520691, "learning_rate": 2.793195402298851e-05, "loss": 0.1793, "step": 11753 }, { "epoch": 40.53103448275862, "grad_norm": 0.8090256452560425, "learning_rate": 2.7931494252873564e-05, "loss": 0.1758, "step": 11754 }, { "epoch": 40.53448275862069, "grad_norm": 0.7219394445419312, "learning_rate": 2.793103448275862e-05, "loss": 0.18, "step": 11755 }, { "epoch": 40.53793103448276, "grad_norm": 1.8151274919509888, "learning_rate": 2.7930574712643678e-05, "loss": 0.1468, "step": 11756 }, { "epoch": 40.54137931034483, "grad_norm": 1.0861194133758545, "learning_rate": 2.7930114942528736e-05, "loss": 0.1517, "step": 11757 }, { "epoch": 40.5448275862069, "grad_norm": 0.5736615061759949, "learning_rate": 2.7929655172413795e-05, "loss": 0.1491, "step": 11758 }, { "epoch": 40.54827586206896, "grad_norm": 0.6556744575500488, "learning_rate": 2.792919540229885e-05, "loss": 0.1436, "step": 11759 }, { "epoch": 40.55172413793103, "grad_norm": 0.641986072063446, "learning_rate": 2.7928735632183906e-05, "loss": 0.137, "step": 11760 }, { "epoch": 40.5551724137931, "grad_norm": 1.0249788761138916, "learning_rate": 2.7928275862068968e-05, "loss": 0.1429, "step": 11761 }, { "epoch": 40.55862068965517, "grad_norm": 0.5966471433639526, "learning_rate": 2.7927816091954023e-05, "loss": 0.1416, "step": 11762 }, { "epoch": 40.56206896551724, "grad_norm": 0.5168145895004272, "learning_rate": 2.7927356321839082e-05, "loss": 0.1242, "step": 11763 }, { "epoch": 40.56551724137931, "grad_norm": 0.6422237157821655, "learning_rate": 2.7926896551724137e-05, "loss": 0.1205, "step": 11764 }, { "epoch": 40.56896551724138, "grad_norm": 0.8198093175888062, "learning_rate": 2.7926436781609196e-05, "loss": 0.1216, "step": 11765 }, { "epoch": 40.57241379310345, "grad_norm": 0.7240015268325806, "learning_rate": 2.7925977011494254e-05, "loss": 0.1243, "step": 11766 }, { "epoch": 40.57586206896552, "grad_norm": 0.6955036520957947, "learning_rate": 2.792551724137931e-05, "loss": 0.1144, "step": 11767 }, { "epoch": 40.57931034482758, "grad_norm": 0.9881756901741028, "learning_rate": 2.792505747126437e-05, "loss": 0.1179, "step": 11768 }, { "epoch": 40.58275862068965, "grad_norm": 0.5950742959976196, "learning_rate": 2.7924597701149427e-05, "loss": 0.1188, "step": 11769 }, { "epoch": 40.58620689655172, "grad_norm": 0.7534858584403992, "learning_rate": 2.7924137931034482e-05, "loss": 0.1104, "step": 11770 }, { "epoch": 40.58965517241379, "grad_norm": 1.7280205488204956, "learning_rate": 2.792367816091954e-05, "loss": 0.1291, "step": 11771 }, { "epoch": 40.59310344827586, "grad_norm": 2.9699957370758057, "learning_rate": 2.7923218390804596e-05, "loss": 0.1021, "step": 11772 }, { "epoch": 40.59655172413793, "grad_norm": 2.6579055786132812, "learning_rate": 2.792275862068966e-05, "loss": 0.1267, "step": 11773 }, { "epoch": 40.6, "grad_norm": 0.9990864992141724, "learning_rate": 2.7922298850574714e-05, "loss": 0.1184, "step": 11774 }, { "epoch": 40.60344827586207, "grad_norm": 1.852492094039917, "learning_rate": 2.792183908045977e-05, "loss": 0.1501, "step": 11775 }, { "epoch": 40.60689655172414, "grad_norm": 0.7869579792022705, "learning_rate": 2.7921379310344828e-05, "loss": 0.2018, "step": 11776 }, { "epoch": 40.610344827586204, "grad_norm": 1.1157070398330688, "learning_rate": 2.7920919540229886e-05, "loss": 0.1974, "step": 11777 }, { "epoch": 40.61379310344827, "grad_norm": 1.480584740638733, "learning_rate": 2.792045977011494e-05, "loss": 0.1659, "step": 11778 }, { "epoch": 40.61724137931034, "grad_norm": 1.0910156965255737, "learning_rate": 2.792e-05, "loss": 0.1818, "step": 11779 }, { "epoch": 40.62068965517241, "grad_norm": 0.8567787408828735, "learning_rate": 2.7919540229885056e-05, "loss": 0.1565, "step": 11780 }, { "epoch": 40.62413793103448, "grad_norm": 2.324859619140625, "learning_rate": 2.7919080459770118e-05, "loss": 0.1597, "step": 11781 }, { "epoch": 40.62758620689655, "grad_norm": 0.73204106092453, "learning_rate": 2.7918620689655173e-05, "loss": 0.1511, "step": 11782 }, { "epoch": 40.63103448275862, "grad_norm": 1.0159928798675537, "learning_rate": 2.7918160919540228e-05, "loss": 0.1498, "step": 11783 }, { "epoch": 40.63448275862069, "grad_norm": 0.4772260785102844, "learning_rate": 2.7917701149425287e-05, "loss": 0.1266, "step": 11784 }, { "epoch": 40.63793103448276, "grad_norm": 0.6090050339698792, "learning_rate": 2.7917241379310346e-05, "loss": 0.1433, "step": 11785 }, { "epoch": 40.641379310344824, "grad_norm": 0.9100318551063538, "learning_rate": 2.7916781609195404e-05, "loss": 0.1569, "step": 11786 }, { "epoch": 40.644827586206894, "grad_norm": 1.195310354232788, "learning_rate": 2.791632183908046e-05, "loss": 0.1397, "step": 11787 }, { "epoch": 40.648275862068964, "grad_norm": 0.9153019189834595, "learning_rate": 2.7915862068965515e-05, "loss": 0.124, "step": 11788 }, { "epoch": 40.65172413793103, "grad_norm": 0.8968881368637085, "learning_rate": 2.7915402298850577e-05, "loss": 0.1596, "step": 11789 }, { "epoch": 40.6551724137931, "grad_norm": 0.8315351605415344, "learning_rate": 2.7914942528735632e-05, "loss": 0.151, "step": 11790 }, { "epoch": 40.65862068965517, "grad_norm": 0.9347983002662659, "learning_rate": 2.791448275862069e-05, "loss": 0.1404, "step": 11791 }, { "epoch": 40.66206896551724, "grad_norm": 0.9890848398208618, "learning_rate": 2.7914022988505746e-05, "loss": 0.1286, "step": 11792 }, { "epoch": 40.66551724137931, "grad_norm": 0.6763122081756592, "learning_rate": 2.7913563218390805e-05, "loss": 0.1172, "step": 11793 }, { "epoch": 40.66896551724138, "grad_norm": 0.9448462724685669, "learning_rate": 2.7913103448275864e-05, "loss": 0.1068, "step": 11794 }, { "epoch": 40.672413793103445, "grad_norm": 1.370289921760559, "learning_rate": 2.791264367816092e-05, "loss": 0.1139, "step": 11795 }, { "epoch": 40.675862068965515, "grad_norm": 0.844764232635498, "learning_rate": 2.7912183908045978e-05, "loss": 0.115, "step": 11796 }, { "epoch": 40.679310344827584, "grad_norm": 0.6647254824638367, "learning_rate": 2.7911724137931036e-05, "loss": 0.1034, "step": 11797 }, { "epoch": 40.682758620689654, "grad_norm": 3.673616409301758, "learning_rate": 2.791126436781609e-05, "loss": 0.1151, "step": 11798 }, { "epoch": 40.686206896551724, "grad_norm": 0.7570379376411438, "learning_rate": 2.791080459770115e-05, "loss": 0.1299, "step": 11799 }, { "epoch": 40.689655172413794, "grad_norm": 1.274048089981079, "learning_rate": 2.7910344827586206e-05, "loss": 0.1861, "step": 11800 }, { "epoch": 40.69310344827586, "grad_norm": 0.7875194549560547, "learning_rate": 2.7909885057471268e-05, "loss": 0.2268, "step": 11801 }, { "epoch": 40.69655172413793, "grad_norm": 0.8849730491638184, "learning_rate": 2.7909425287356323e-05, "loss": 0.1776, "step": 11802 }, { "epoch": 40.7, "grad_norm": 1.3349769115447998, "learning_rate": 2.7908965517241378e-05, "loss": 0.184, "step": 11803 }, { "epoch": 40.703448275862065, "grad_norm": 0.6739205718040466, "learning_rate": 2.7908505747126437e-05, "loss": 0.1648, "step": 11804 }, { "epoch": 40.706896551724135, "grad_norm": 0.7420412302017212, "learning_rate": 2.7908045977011496e-05, "loss": 0.1723, "step": 11805 }, { "epoch": 40.710344827586205, "grad_norm": 0.8694465160369873, "learning_rate": 2.7907586206896554e-05, "loss": 0.1563, "step": 11806 }, { "epoch": 40.713793103448275, "grad_norm": 0.6973437070846558, "learning_rate": 2.790712643678161e-05, "loss": 0.1582, "step": 11807 }, { "epoch": 40.717241379310344, "grad_norm": 0.7746396064758301, "learning_rate": 2.7906666666666665e-05, "loss": 0.1501, "step": 11808 }, { "epoch": 40.720689655172414, "grad_norm": 1.4606819152832031, "learning_rate": 2.7906206896551727e-05, "loss": 0.1417, "step": 11809 }, { "epoch": 40.724137931034484, "grad_norm": 0.6521008610725403, "learning_rate": 2.7905747126436782e-05, "loss": 0.1438, "step": 11810 }, { "epoch": 40.727586206896554, "grad_norm": 0.7490999698638916, "learning_rate": 2.7905287356321838e-05, "loss": 0.1611, "step": 11811 }, { "epoch": 40.73103448275862, "grad_norm": 0.7601701617240906, "learning_rate": 2.7904827586206896e-05, "loss": 0.1282, "step": 11812 }, { "epoch": 40.734482758620686, "grad_norm": 0.6846688389778137, "learning_rate": 2.7904367816091955e-05, "loss": 0.1358, "step": 11813 }, { "epoch": 40.737931034482756, "grad_norm": 0.7778067588806152, "learning_rate": 2.7903908045977014e-05, "loss": 0.1286, "step": 11814 }, { "epoch": 40.741379310344826, "grad_norm": 1.0610971450805664, "learning_rate": 2.790344827586207e-05, "loss": 0.1493, "step": 11815 }, { "epoch": 40.744827586206895, "grad_norm": 1.0356992483139038, "learning_rate": 2.7902988505747124e-05, "loss": 0.1177, "step": 11816 }, { "epoch": 40.748275862068965, "grad_norm": 1.1441752910614014, "learning_rate": 2.7902528735632186e-05, "loss": 0.116, "step": 11817 }, { "epoch": 40.751724137931035, "grad_norm": 0.7899928092956543, "learning_rate": 2.790206896551724e-05, "loss": 0.1192, "step": 11818 }, { "epoch": 40.755172413793105, "grad_norm": 0.6208645105361938, "learning_rate": 2.79016091954023e-05, "loss": 0.122, "step": 11819 }, { "epoch": 40.758620689655174, "grad_norm": 1.048755168914795, "learning_rate": 2.7901149425287356e-05, "loss": 0.1028, "step": 11820 }, { "epoch": 40.762068965517244, "grad_norm": 1.3539106845855713, "learning_rate": 2.7900689655172414e-05, "loss": 0.1193, "step": 11821 }, { "epoch": 40.765517241379314, "grad_norm": 0.6610104441642761, "learning_rate": 2.7900229885057473e-05, "loss": 0.1115, "step": 11822 }, { "epoch": 40.76896551724138, "grad_norm": 0.8307573199272156, "learning_rate": 2.7899770114942528e-05, "loss": 0.108, "step": 11823 }, { "epoch": 40.772413793103446, "grad_norm": 0.88006192445755, "learning_rate": 2.7899310344827587e-05, "loss": 0.1279, "step": 11824 }, { "epoch": 40.775862068965516, "grad_norm": 1.4205797910690308, "learning_rate": 2.7898850574712646e-05, "loss": 0.1211, "step": 11825 }, { "epoch": 40.779310344827586, "grad_norm": 1.053330659866333, "learning_rate": 2.78983908045977e-05, "loss": 0.2244, "step": 11826 }, { "epoch": 40.782758620689656, "grad_norm": 0.5796409249305725, "learning_rate": 2.789793103448276e-05, "loss": 0.1967, "step": 11827 }, { "epoch": 40.786206896551725, "grad_norm": 0.5129582285881042, "learning_rate": 2.7897471264367815e-05, "loss": 0.172, "step": 11828 }, { "epoch": 40.789655172413795, "grad_norm": 0.5083187818527222, "learning_rate": 2.7897011494252877e-05, "loss": 0.1812, "step": 11829 }, { "epoch": 40.793103448275865, "grad_norm": 0.7066628336906433, "learning_rate": 2.7896551724137932e-05, "loss": 0.1873, "step": 11830 }, { "epoch": 40.796551724137935, "grad_norm": 0.5763672590255737, "learning_rate": 2.7896091954022987e-05, "loss": 0.1519, "step": 11831 }, { "epoch": 40.8, "grad_norm": 2.805004119873047, "learning_rate": 2.7895632183908046e-05, "loss": 0.1396, "step": 11832 }, { "epoch": 40.80344827586207, "grad_norm": 0.6230461001396179, "learning_rate": 2.7895172413793105e-05, "loss": 0.1459, "step": 11833 }, { "epoch": 40.80689655172414, "grad_norm": 0.4898602366447449, "learning_rate": 2.7894712643678164e-05, "loss": 0.1503, "step": 11834 }, { "epoch": 40.810344827586206, "grad_norm": 0.6101446151733398, "learning_rate": 2.789425287356322e-05, "loss": 0.1436, "step": 11835 }, { "epoch": 40.813793103448276, "grad_norm": 2.803314208984375, "learning_rate": 2.7893793103448274e-05, "loss": 0.1412, "step": 11836 }, { "epoch": 40.817241379310346, "grad_norm": 1.1435198783874512, "learning_rate": 2.7893333333333336e-05, "loss": 0.1156, "step": 11837 }, { "epoch": 40.820689655172416, "grad_norm": 0.7559360265731812, "learning_rate": 2.789287356321839e-05, "loss": 0.1354, "step": 11838 }, { "epoch": 40.824137931034485, "grad_norm": 1.0477510690689087, "learning_rate": 2.7892413793103447e-05, "loss": 0.121, "step": 11839 }, { "epoch": 40.827586206896555, "grad_norm": 0.6943573355674744, "learning_rate": 2.7891954022988505e-05, "loss": 0.1142, "step": 11840 }, { "epoch": 40.83103448275862, "grad_norm": 0.6271378993988037, "learning_rate": 2.7891494252873564e-05, "loss": 0.12, "step": 11841 }, { "epoch": 40.83448275862069, "grad_norm": 0.7589528560638428, "learning_rate": 2.7891034482758623e-05, "loss": 0.123, "step": 11842 }, { "epoch": 40.83793103448276, "grad_norm": 1.1358497142791748, "learning_rate": 2.7890574712643678e-05, "loss": 0.1257, "step": 11843 }, { "epoch": 40.84137931034483, "grad_norm": 1.1042550802230835, "learning_rate": 2.7890114942528733e-05, "loss": 0.1318, "step": 11844 }, { "epoch": 40.8448275862069, "grad_norm": 0.6960496306419373, "learning_rate": 2.7889655172413795e-05, "loss": 0.1177, "step": 11845 }, { "epoch": 40.84827586206897, "grad_norm": 0.6922822594642639, "learning_rate": 2.788919540229885e-05, "loss": 0.0955, "step": 11846 }, { "epoch": 40.851724137931036, "grad_norm": 1.0474412441253662, "learning_rate": 2.788873563218391e-05, "loss": 0.102, "step": 11847 }, { "epoch": 40.855172413793106, "grad_norm": 1.7826316356658936, "learning_rate": 2.7888275862068965e-05, "loss": 0.114, "step": 11848 }, { "epoch": 40.858620689655176, "grad_norm": 0.9580329060554504, "learning_rate": 2.7887816091954023e-05, "loss": 0.1125, "step": 11849 }, { "epoch": 40.86206896551724, "grad_norm": 0.9531774520874023, "learning_rate": 2.7887356321839082e-05, "loss": 0.1423, "step": 11850 }, { "epoch": 40.86551724137931, "grad_norm": 0.659142792224884, "learning_rate": 2.7886896551724137e-05, "loss": 0.2409, "step": 11851 }, { "epoch": 40.86896551724138, "grad_norm": 0.6826728582382202, "learning_rate": 2.7886436781609196e-05, "loss": 0.1995, "step": 11852 }, { "epoch": 40.87241379310345, "grad_norm": 0.547243058681488, "learning_rate": 2.7885977011494255e-05, "loss": 0.1736, "step": 11853 }, { "epoch": 40.87586206896552, "grad_norm": 1.1701080799102783, "learning_rate": 2.788551724137931e-05, "loss": 0.1799, "step": 11854 }, { "epoch": 40.87931034482759, "grad_norm": 1.5189658403396606, "learning_rate": 2.788505747126437e-05, "loss": 0.1613, "step": 11855 }, { "epoch": 40.88275862068966, "grad_norm": 1.4116151332855225, "learning_rate": 2.7884597701149424e-05, "loss": 0.1483, "step": 11856 }, { "epoch": 40.88620689655173, "grad_norm": 0.8265933394432068, "learning_rate": 2.7884137931034486e-05, "loss": 0.1326, "step": 11857 }, { "epoch": 40.889655172413796, "grad_norm": 2.1229608058929443, "learning_rate": 2.788367816091954e-05, "loss": 0.1705, "step": 11858 }, { "epoch": 40.89310344827586, "grad_norm": 0.9507710933685303, "learning_rate": 2.7883218390804597e-05, "loss": 0.1535, "step": 11859 }, { "epoch": 40.89655172413793, "grad_norm": 1.0392764806747437, "learning_rate": 2.7882758620689655e-05, "loss": 0.1404, "step": 11860 }, { "epoch": 40.9, "grad_norm": 0.7244572639465332, "learning_rate": 2.7882298850574714e-05, "loss": 0.1268, "step": 11861 }, { "epoch": 40.90344827586207, "grad_norm": 0.5686535239219666, "learning_rate": 2.7881839080459773e-05, "loss": 0.1426, "step": 11862 }, { "epoch": 40.90689655172414, "grad_norm": 0.5321837067604065, "learning_rate": 2.7881379310344828e-05, "loss": 0.1473, "step": 11863 }, { "epoch": 40.91034482758621, "grad_norm": 0.9425690770149231, "learning_rate": 2.7880919540229883e-05, "loss": 0.1515, "step": 11864 }, { "epoch": 40.91379310344828, "grad_norm": 2.6231701374053955, "learning_rate": 2.7880459770114945e-05, "loss": 0.1443, "step": 11865 }, { "epoch": 40.91724137931035, "grad_norm": 0.6337547302246094, "learning_rate": 2.788e-05, "loss": 0.1374, "step": 11866 }, { "epoch": 40.92068965517242, "grad_norm": 0.9334180951118469, "learning_rate": 2.7879540229885056e-05, "loss": 0.1141, "step": 11867 }, { "epoch": 40.92413793103448, "grad_norm": 0.5588310360908508, "learning_rate": 2.7879080459770115e-05, "loss": 0.1245, "step": 11868 }, { "epoch": 40.92758620689655, "grad_norm": 0.7732442617416382, "learning_rate": 2.7878620689655173e-05, "loss": 0.1248, "step": 11869 }, { "epoch": 40.93103448275862, "grad_norm": 0.9320520758628845, "learning_rate": 2.7878160919540232e-05, "loss": 0.1183, "step": 11870 }, { "epoch": 40.93448275862069, "grad_norm": 4.59244966506958, "learning_rate": 2.7877701149425287e-05, "loss": 0.1158, "step": 11871 }, { "epoch": 40.93793103448276, "grad_norm": 0.7224825024604797, "learning_rate": 2.7877241379310343e-05, "loss": 0.1088, "step": 11872 }, { "epoch": 40.94137931034483, "grad_norm": 0.9052416086196899, "learning_rate": 2.7876781609195405e-05, "loss": 0.0993, "step": 11873 }, { "epoch": 40.9448275862069, "grad_norm": 0.7622255086898804, "learning_rate": 2.787632183908046e-05, "loss": 0.1053, "step": 11874 }, { "epoch": 40.94827586206897, "grad_norm": 1.82595694065094, "learning_rate": 2.787586206896552e-05, "loss": 0.1727, "step": 11875 }, { "epoch": 40.95172413793104, "grad_norm": 0.8928139805793762, "learning_rate": 2.7875402298850574e-05, "loss": 0.1884, "step": 11876 }, { "epoch": 40.9551724137931, "grad_norm": 0.6927770972251892, "learning_rate": 2.7874942528735633e-05, "loss": 0.1922, "step": 11877 }, { "epoch": 40.95862068965517, "grad_norm": 0.9186509251594543, "learning_rate": 2.787448275862069e-05, "loss": 0.1554, "step": 11878 }, { "epoch": 40.96206896551724, "grad_norm": 1.2442209720611572, "learning_rate": 2.7874022988505747e-05, "loss": 0.1656, "step": 11879 }, { "epoch": 40.96551724137931, "grad_norm": 0.7952768206596375, "learning_rate": 2.7873563218390805e-05, "loss": 0.1712, "step": 11880 }, { "epoch": 40.96896551724138, "grad_norm": 4.886902809143066, "learning_rate": 2.7873103448275864e-05, "loss": 0.1549, "step": 11881 }, { "epoch": 40.97241379310345, "grad_norm": 0.9248790144920349, "learning_rate": 2.787264367816092e-05, "loss": 0.1605, "step": 11882 }, { "epoch": 40.97586206896552, "grad_norm": 0.5161663293838501, "learning_rate": 2.7872183908045978e-05, "loss": 0.1385, "step": 11883 }, { "epoch": 40.97931034482759, "grad_norm": 1.8625320196151733, "learning_rate": 2.7871724137931033e-05, "loss": 0.1542, "step": 11884 }, { "epoch": 40.98275862068966, "grad_norm": 0.5619131326675415, "learning_rate": 2.7871264367816095e-05, "loss": 0.1164, "step": 11885 }, { "epoch": 40.98620689655172, "grad_norm": 1.0469545125961304, "learning_rate": 2.787080459770115e-05, "loss": 0.1263, "step": 11886 }, { "epoch": 40.98965517241379, "grad_norm": 0.8741365075111389, "learning_rate": 2.7870344827586206e-05, "loss": 0.1191, "step": 11887 }, { "epoch": 40.99310344827586, "grad_norm": 0.9119535684585571, "learning_rate": 2.7869885057471265e-05, "loss": 0.1188, "step": 11888 }, { "epoch": 40.99655172413793, "grad_norm": 0.720704972743988, "learning_rate": 2.7869425287356323e-05, "loss": 0.0982, "step": 11889 }, { "epoch": 41.0, "grad_norm": 6.535160541534424, "learning_rate": 2.7868965517241382e-05, "loss": 0.1581, "step": 11890 }, { "epoch": 41.00344827586207, "grad_norm": 0.6805986166000366, "learning_rate": 2.7868505747126437e-05, "loss": 0.2226, "step": 11891 }, { "epoch": 41.00689655172414, "grad_norm": 0.572300910949707, "learning_rate": 2.7868045977011493e-05, "loss": 0.1827, "step": 11892 }, { "epoch": 41.01034482758621, "grad_norm": 0.7962373495101929, "learning_rate": 2.7867586206896555e-05, "loss": 0.1738, "step": 11893 }, { "epoch": 41.01379310344828, "grad_norm": 0.5130933523178101, "learning_rate": 2.786712643678161e-05, "loss": 0.1571, "step": 11894 }, { "epoch": 41.01724137931034, "grad_norm": 1.1399163007736206, "learning_rate": 2.7866666666666665e-05, "loss": 0.1429, "step": 11895 }, { "epoch": 41.02068965517241, "grad_norm": 0.642897367477417, "learning_rate": 2.7866206896551724e-05, "loss": 0.1567, "step": 11896 }, { "epoch": 41.02413793103448, "grad_norm": 0.725182294845581, "learning_rate": 2.7865747126436783e-05, "loss": 0.1543, "step": 11897 }, { "epoch": 41.02758620689655, "grad_norm": 0.6084275841712952, "learning_rate": 2.786528735632184e-05, "loss": 0.1556, "step": 11898 }, { "epoch": 41.03103448275862, "grad_norm": 0.4611421823501587, "learning_rate": 2.7864827586206897e-05, "loss": 0.1344, "step": 11899 }, { "epoch": 41.03448275862069, "grad_norm": 2.6376240253448486, "learning_rate": 2.7864367816091952e-05, "loss": 0.1328, "step": 11900 }, { "epoch": 41.03793103448276, "grad_norm": 0.6717047095298767, "learning_rate": 2.7863908045977014e-05, "loss": 0.1345, "step": 11901 }, { "epoch": 41.04137931034483, "grad_norm": 0.5581512451171875, "learning_rate": 2.786344827586207e-05, "loss": 0.1254, "step": 11902 }, { "epoch": 41.0448275862069, "grad_norm": 0.896269679069519, "learning_rate": 2.7862988505747128e-05, "loss": 0.1384, "step": 11903 }, { "epoch": 41.04827586206896, "grad_norm": 0.5928277969360352, "learning_rate": 2.7862528735632183e-05, "loss": 0.1307, "step": 11904 }, { "epoch": 41.05172413793103, "grad_norm": 1.040685772895813, "learning_rate": 2.7862068965517242e-05, "loss": 0.123, "step": 11905 }, { "epoch": 41.0551724137931, "grad_norm": 0.8467941284179688, "learning_rate": 2.78616091954023e-05, "loss": 0.1053, "step": 11906 }, { "epoch": 41.05862068965517, "grad_norm": 0.6045325994491577, "learning_rate": 2.7861149425287356e-05, "loss": 0.1054, "step": 11907 }, { "epoch": 41.06206896551724, "grad_norm": 1.0747803449630737, "learning_rate": 2.7860689655172415e-05, "loss": 0.1122, "step": 11908 }, { "epoch": 41.06551724137931, "grad_norm": 1.2228373289108276, "learning_rate": 2.7860229885057473e-05, "loss": 0.121, "step": 11909 }, { "epoch": 41.06896551724138, "grad_norm": 1.0814549922943115, "learning_rate": 2.785977011494253e-05, "loss": 0.0961, "step": 11910 }, { "epoch": 41.07241379310345, "grad_norm": 0.8392794728279114, "learning_rate": 2.7859310344827587e-05, "loss": 0.1144, "step": 11911 }, { "epoch": 41.07586206896552, "grad_norm": 0.5752587914466858, "learning_rate": 2.7858850574712643e-05, "loss": 0.0908, "step": 11912 }, { "epoch": 41.07931034482758, "grad_norm": 1.0401684045791626, "learning_rate": 2.7858390804597705e-05, "loss": 0.1066, "step": 11913 }, { "epoch": 41.08275862068965, "grad_norm": 1.4423011541366577, "learning_rate": 2.785793103448276e-05, "loss": 0.0978, "step": 11914 }, { "epoch": 41.08620689655172, "grad_norm": 1.0593105554580688, "learning_rate": 2.7857471264367815e-05, "loss": 0.1423, "step": 11915 }, { "epoch": 41.08965517241379, "grad_norm": 0.804160475730896, "learning_rate": 2.7857011494252874e-05, "loss": 0.2111, "step": 11916 }, { "epoch": 41.09310344827586, "grad_norm": 0.6861268281936646, "learning_rate": 2.7856551724137933e-05, "loss": 0.1742, "step": 11917 }, { "epoch": 41.09655172413793, "grad_norm": 0.6653047204017639, "learning_rate": 2.785609195402299e-05, "loss": 0.17, "step": 11918 }, { "epoch": 41.1, "grad_norm": 0.8181291222572327, "learning_rate": 2.7855632183908047e-05, "loss": 0.1613, "step": 11919 }, { "epoch": 41.10344827586207, "grad_norm": 0.7577361464500427, "learning_rate": 2.7855172413793102e-05, "loss": 0.1474, "step": 11920 }, { "epoch": 41.10689655172414, "grad_norm": 0.5354645252227783, "learning_rate": 2.7854712643678164e-05, "loss": 0.1288, "step": 11921 }, { "epoch": 41.110344827586204, "grad_norm": 0.6103320717811584, "learning_rate": 2.785425287356322e-05, "loss": 0.1479, "step": 11922 }, { "epoch": 41.11379310344827, "grad_norm": 0.7566019892692566, "learning_rate": 2.7853793103448278e-05, "loss": 0.1577, "step": 11923 }, { "epoch": 41.11724137931034, "grad_norm": 0.615723192691803, "learning_rate": 2.7853333333333333e-05, "loss": 0.1439, "step": 11924 }, { "epoch": 41.12068965517241, "grad_norm": 0.5227155685424805, "learning_rate": 2.7852873563218392e-05, "loss": 0.1342, "step": 11925 }, { "epoch": 41.12413793103448, "grad_norm": 0.954628050327301, "learning_rate": 2.785241379310345e-05, "loss": 0.1435, "step": 11926 }, { "epoch": 41.12758620689655, "grad_norm": 0.9029730558395386, "learning_rate": 2.7851954022988506e-05, "loss": 0.1175, "step": 11927 }, { "epoch": 41.13103448275862, "grad_norm": 0.6986355781555176, "learning_rate": 2.785149425287356e-05, "loss": 0.1281, "step": 11928 }, { "epoch": 41.13448275862069, "grad_norm": 0.48720887303352356, "learning_rate": 2.7851034482758623e-05, "loss": 0.1301, "step": 11929 }, { "epoch": 41.13793103448276, "grad_norm": 0.5560234785079956, "learning_rate": 2.785057471264368e-05, "loss": 0.12, "step": 11930 }, { "epoch": 41.141379310344824, "grad_norm": 0.6567030549049377, "learning_rate": 2.7850114942528737e-05, "loss": 0.111, "step": 11931 }, { "epoch": 41.144827586206894, "grad_norm": 1.221776008605957, "learning_rate": 2.7849655172413792e-05, "loss": 0.1256, "step": 11932 }, { "epoch": 41.148275862068964, "grad_norm": 1.1641077995300293, "learning_rate": 2.784919540229885e-05, "loss": 0.1234, "step": 11933 }, { "epoch": 41.15172413793103, "grad_norm": 0.5724689364433289, "learning_rate": 2.784873563218391e-05, "loss": 0.0992, "step": 11934 }, { "epoch": 41.1551724137931, "grad_norm": 1.264756679534912, "learning_rate": 2.7848275862068965e-05, "loss": 0.1176, "step": 11935 }, { "epoch": 41.15862068965517, "grad_norm": 1.532914161682129, "learning_rate": 2.7847816091954024e-05, "loss": 0.1023, "step": 11936 }, { "epoch": 41.16206896551724, "grad_norm": 2.1076996326446533, "learning_rate": 2.7847356321839083e-05, "loss": 0.098, "step": 11937 }, { "epoch": 41.16551724137931, "grad_norm": 0.7160444855690002, "learning_rate": 2.7846896551724138e-05, "loss": 0.0981, "step": 11938 }, { "epoch": 41.16896551724138, "grad_norm": 0.6355757117271423, "learning_rate": 2.7846436781609196e-05, "loss": 0.0868, "step": 11939 }, { "epoch": 41.172413793103445, "grad_norm": 1.03900146484375, "learning_rate": 2.7845977011494252e-05, "loss": 0.1194, "step": 11940 }, { "epoch": 41.175862068965515, "grad_norm": 0.5686364769935608, "learning_rate": 2.7845517241379314e-05, "loss": 0.2129, "step": 11941 }, { "epoch": 41.179310344827584, "grad_norm": 0.5388920903205872, "learning_rate": 2.784505747126437e-05, "loss": 0.1381, "step": 11942 }, { "epoch": 41.182758620689654, "grad_norm": 0.6442224979400635, "learning_rate": 2.7844597701149424e-05, "loss": 0.1695, "step": 11943 }, { "epoch": 41.186206896551724, "grad_norm": 1.1506837606430054, "learning_rate": 2.7844137931034483e-05, "loss": 0.1684, "step": 11944 }, { "epoch": 41.189655172413794, "grad_norm": 2.0077507495880127, "learning_rate": 2.7843678160919542e-05, "loss": 0.1428, "step": 11945 }, { "epoch": 41.19310344827586, "grad_norm": 0.631734311580658, "learning_rate": 2.78432183908046e-05, "loss": 0.1513, "step": 11946 }, { "epoch": 41.19655172413793, "grad_norm": 0.8695550560951233, "learning_rate": 2.7842758620689656e-05, "loss": 0.1467, "step": 11947 }, { "epoch": 41.2, "grad_norm": 0.5237994194030762, "learning_rate": 2.784229885057471e-05, "loss": 0.1535, "step": 11948 }, { "epoch": 41.203448275862065, "grad_norm": 0.706161618232727, "learning_rate": 2.7841839080459773e-05, "loss": 0.1251, "step": 11949 }, { "epoch": 41.206896551724135, "grad_norm": 0.7336849570274353, "learning_rate": 2.784137931034483e-05, "loss": 0.132, "step": 11950 }, { "epoch": 41.210344827586205, "grad_norm": 0.5422658324241638, "learning_rate": 2.7840919540229887e-05, "loss": 0.121, "step": 11951 }, { "epoch": 41.213793103448275, "grad_norm": 0.6456677913665771, "learning_rate": 2.7840459770114942e-05, "loss": 0.1297, "step": 11952 }, { "epoch": 41.217241379310344, "grad_norm": 0.7811408638954163, "learning_rate": 2.784e-05, "loss": 0.1219, "step": 11953 }, { "epoch": 41.220689655172414, "grad_norm": 0.8268111348152161, "learning_rate": 2.783954022988506e-05, "loss": 0.1165, "step": 11954 }, { "epoch": 41.224137931034484, "grad_norm": 0.7123024463653564, "learning_rate": 2.7839080459770115e-05, "loss": 0.1258, "step": 11955 }, { "epoch": 41.227586206896554, "grad_norm": 0.5659504532814026, "learning_rate": 2.783862068965517e-05, "loss": 0.1172, "step": 11956 }, { "epoch": 41.23103448275862, "grad_norm": 0.993030309677124, "learning_rate": 2.7838160919540232e-05, "loss": 0.1119, "step": 11957 }, { "epoch": 41.234482758620686, "grad_norm": 0.6471213698387146, "learning_rate": 2.7837701149425288e-05, "loss": 0.1057, "step": 11958 }, { "epoch": 41.237931034482756, "grad_norm": 0.6332545876502991, "learning_rate": 2.7837241379310346e-05, "loss": 0.1038, "step": 11959 }, { "epoch": 41.241379310344826, "grad_norm": 0.6505969762802124, "learning_rate": 2.7836781609195402e-05, "loss": 0.1046, "step": 11960 }, { "epoch": 41.244827586206895, "grad_norm": 0.7660714387893677, "learning_rate": 2.783632183908046e-05, "loss": 0.1012, "step": 11961 }, { "epoch": 41.248275862068965, "grad_norm": 0.619316041469574, "learning_rate": 2.783586206896552e-05, "loss": 0.079, "step": 11962 }, { "epoch": 41.251724137931035, "grad_norm": 0.8008268475532532, "learning_rate": 2.7835402298850574e-05, "loss": 0.1011, "step": 11963 }, { "epoch": 41.255172413793105, "grad_norm": 0.8644103407859802, "learning_rate": 2.7834942528735633e-05, "loss": 0.0878, "step": 11964 }, { "epoch": 41.258620689655174, "grad_norm": 1.2630164623260498, "learning_rate": 2.7834482758620692e-05, "loss": 0.1432, "step": 11965 }, { "epoch": 41.262068965517244, "grad_norm": 0.7756518125534058, "learning_rate": 2.7834022988505747e-05, "loss": 0.2246, "step": 11966 }, { "epoch": 41.265517241379314, "grad_norm": 0.521003246307373, "learning_rate": 2.7833563218390806e-05, "loss": 0.1681, "step": 11967 }, { "epoch": 41.26896551724138, "grad_norm": 0.6028465628623962, "learning_rate": 2.783310344827586e-05, "loss": 0.1795, "step": 11968 }, { "epoch": 41.272413793103446, "grad_norm": 0.5923756957054138, "learning_rate": 2.7832643678160923e-05, "loss": 0.1676, "step": 11969 }, { "epoch": 41.275862068965516, "grad_norm": 0.8886812925338745, "learning_rate": 2.783218390804598e-05, "loss": 0.1344, "step": 11970 }, { "epoch": 41.279310344827586, "grad_norm": 1.0262154340744019, "learning_rate": 2.7831724137931034e-05, "loss": 0.1437, "step": 11971 }, { "epoch": 41.282758620689656, "grad_norm": 1.1608253717422485, "learning_rate": 2.7831264367816092e-05, "loss": 0.1671, "step": 11972 }, { "epoch": 41.286206896551725, "grad_norm": 0.9611548185348511, "learning_rate": 2.783080459770115e-05, "loss": 0.1403, "step": 11973 }, { "epoch": 41.289655172413795, "grad_norm": 0.69005286693573, "learning_rate": 2.783034482758621e-05, "loss": 0.1386, "step": 11974 }, { "epoch": 41.293103448275865, "grad_norm": 0.6114193797111511, "learning_rate": 2.7829885057471265e-05, "loss": 0.1308, "step": 11975 }, { "epoch": 41.296551724137935, "grad_norm": 0.7856693863868713, "learning_rate": 2.782942528735632e-05, "loss": 0.1193, "step": 11976 }, { "epoch": 41.3, "grad_norm": 0.793415367603302, "learning_rate": 2.7828965517241382e-05, "loss": 0.1382, "step": 11977 }, { "epoch": 41.30344827586207, "grad_norm": 0.467740923166275, "learning_rate": 2.7828505747126438e-05, "loss": 0.111, "step": 11978 }, { "epoch": 41.30689655172414, "grad_norm": 4.855302810668945, "learning_rate": 2.7828045977011496e-05, "loss": 0.1012, "step": 11979 }, { "epoch": 41.310344827586206, "grad_norm": 0.6347041130065918, "learning_rate": 2.782758620689655e-05, "loss": 0.118, "step": 11980 }, { "epoch": 41.313793103448276, "grad_norm": 0.6863338351249695, "learning_rate": 2.782712643678161e-05, "loss": 0.1285, "step": 11981 }, { "epoch": 41.317241379310346, "grad_norm": 0.7415573000907898, "learning_rate": 2.782666666666667e-05, "loss": 0.1262, "step": 11982 }, { "epoch": 41.320689655172416, "grad_norm": 2.5014076232910156, "learning_rate": 2.7826206896551724e-05, "loss": 0.1116, "step": 11983 }, { "epoch": 41.324137931034485, "grad_norm": 0.9302916526794434, "learning_rate": 2.782574712643678e-05, "loss": 0.0943, "step": 11984 }, { "epoch": 41.327586206896555, "grad_norm": 0.982127845287323, "learning_rate": 2.7825287356321842e-05, "loss": 0.1, "step": 11985 }, { "epoch": 41.33103448275862, "grad_norm": 3.882004976272583, "learning_rate": 2.7824827586206897e-05, "loss": 0.1085, "step": 11986 }, { "epoch": 41.33448275862069, "grad_norm": 0.8281375169754028, "learning_rate": 2.7824367816091956e-05, "loss": 0.0916, "step": 11987 }, { "epoch": 41.33793103448276, "grad_norm": 0.8988734483718872, "learning_rate": 2.782390804597701e-05, "loss": 0.0929, "step": 11988 }, { "epoch": 41.34137931034483, "grad_norm": 0.8919851779937744, "learning_rate": 2.782344827586207e-05, "loss": 0.0939, "step": 11989 }, { "epoch": 41.3448275862069, "grad_norm": 1.2896007299423218, "learning_rate": 2.782298850574713e-05, "loss": 0.1532, "step": 11990 }, { "epoch": 41.34827586206897, "grad_norm": 0.6805572509765625, "learning_rate": 2.7822528735632184e-05, "loss": 0.2117, "step": 11991 }, { "epoch": 41.351724137931036, "grad_norm": 0.5688475370407104, "learning_rate": 2.7822068965517242e-05, "loss": 0.1796, "step": 11992 }, { "epoch": 41.355172413793106, "grad_norm": 1.2013331651687622, "learning_rate": 2.78216091954023e-05, "loss": 0.1571, "step": 11993 }, { "epoch": 41.358620689655176, "grad_norm": 1.2206281423568726, "learning_rate": 2.7821149425287356e-05, "loss": 0.1494, "step": 11994 }, { "epoch": 41.36206896551724, "grad_norm": 0.5915943384170532, "learning_rate": 2.7820689655172415e-05, "loss": 0.1678, "step": 11995 }, { "epoch": 41.36551724137931, "grad_norm": 0.6025797128677368, "learning_rate": 2.782022988505747e-05, "loss": 0.1413, "step": 11996 }, { "epoch": 41.36896551724138, "grad_norm": 0.7588165402412415, "learning_rate": 2.7819770114942532e-05, "loss": 0.1628, "step": 11997 }, { "epoch": 41.37241379310345, "grad_norm": 0.5640226602554321, "learning_rate": 2.7819310344827588e-05, "loss": 0.1445, "step": 11998 }, { "epoch": 41.37586206896552, "grad_norm": 1.794654369354248, "learning_rate": 2.7818850574712643e-05, "loss": 0.1355, "step": 11999 }, { "epoch": 41.37931034482759, "grad_norm": 0.6887781620025635, "learning_rate": 2.78183908045977e-05, "loss": 0.1455, "step": 12000 }, { "epoch": 41.37931034482759, "eval_cer": 0.1347972885899121, "eval_loss": 0.35223913192749023, "eval_runtime": 18.6328, "eval_samples_per_second": 49.751, "eval_steps_per_second": 0.161, "eval_wer": 0.3079710144927536, "step": 12000 }, { "epoch": 41.38275862068966, "grad_norm": 1.647302508354187, "learning_rate": 2.781793103448276e-05, "loss": 0.1274, "step": 12001 }, { "epoch": 41.38620689655173, "grad_norm": 1.355507731437683, "learning_rate": 2.781747126436782e-05, "loss": 0.1298, "step": 12002 }, { "epoch": 41.389655172413796, "grad_norm": 1.3337717056274414, "learning_rate": 2.7817011494252874e-05, "loss": 0.1332, "step": 12003 }, { "epoch": 41.39310344827586, "grad_norm": 0.5716807246208191, "learning_rate": 2.781655172413793e-05, "loss": 0.1234, "step": 12004 }, { "epoch": 41.39655172413793, "grad_norm": 1.3003660440444946, "learning_rate": 2.781609195402299e-05, "loss": 0.1276, "step": 12005 }, { "epoch": 41.4, "grad_norm": 0.663782000541687, "learning_rate": 2.7815632183908047e-05, "loss": 0.1097, "step": 12006 }, { "epoch": 41.40344827586207, "grad_norm": 0.9351308345794678, "learning_rate": 2.7815172413793106e-05, "loss": 0.106, "step": 12007 }, { "epoch": 41.40689655172414, "grad_norm": 6.276841163635254, "learning_rate": 2.781471264367816e-05, "loss": 0.1304, "step": 12008 }, { "epoch": 41.41034482758621, "grad_norm": 0.7461264133453369, "learning_rate": 2.781425287356322e-05, "loss": 0.1042, "step": 12009 }, { "epoch": 41.41379310344828, "grad_norm": 1.2917685508728027, "learning_rate": 2.7813793103448278e-05, "loss": 0.1109, "step": 12010 }, { "epoch": 41.41724137931035, "grad_norm": 0.611789882183075, "learning_rate": 2.7813333333333334e-05, "loss": 0.0961, "step": 12011 }, { "epoch": 41.42068965517242, "grad_norm": 0.795789897441864, "learning_rate": 2.7812873563218392e-05, "loss": 0.1018, "step": 12012 }, { "epoch": 41.42413793103448, "grad_norm": 0.8860388398170471, "learning_rate": 2.781241379310345e-05, "loss": 0.0928, "step": 12013 }, { "epoch": 41.42758620689655, "grad_norm": 0.7006707191467285, "learning_rate": 2.7811954022988506e-05, "loss": 0.1111, "step": 12014 }, { "epoch": 41.43103448275862, "grad_norm": 1.098008155822754, "learning_rate": 2.7811494252873565e-05, "loss": 0.1569, "step": 12015 }, { "epoch": 41.43448275862069, "grad_norm": 1.0046659708023071, "learning_rate": 2.781103448275862e-05, "loss": 0.2116, "step": 12016 }, { "epoch": 41.43793103448276, "grad_norm": 0.680325984954834, "learning_rate": 2.781057471264368e-05, "loss": 0.1681, "step": 12017 }, { "epoch": 41.44137931034483, "grad_norm": 0.5206162929534912, "learning_rate": 2.7810114942528738e-05, "loss": 0.1762, "step": 12018 }, { "epoch": 41.4448275862069, "grad_norm": 0.564152717590332, "learning_rate": 2.7809655172413793e-05, "loss": 0.163, "step": 12019 }, { "epoch": 41.44827586206897, "grad_norm": 0.5677904486656189, "learning_rate": 2.780919540229885e-05, "loss": 0.148, "step": 12020 }, { "epoch": 41.45172413793104, "grad_norm": 0.801256000995636, "learning_rate": 2.780873563218391e-05, "loss": 0.1811, "step": 12021 }, { "epoch": 41.4551724137931, "grad_norm": 0.798141360282898, "learning_rate": 2.7808275862068966e-05, "loss": 0.1406, "step": 12022 }, { "epoch": 41.45862068965517, "grad_norm": 0.5751204490661621, "learning_rate": 2.7807816091954024e-05, "loss": 0.1407, "step": 12023 }, { "epoch": 41.46206896551724, "grad_norm": 0.8733583688735962, "learning_rate": 2.780735632183908e-05, "loss": 0.1379, "step": 12024 }, { "epoch": 41.46551724137931, "grad_norm": 1.4542852640151978, "learning_rate": 2.780689655172414e-05, "loss": 0.1314, "step": 12025 }, { "epoch": 41.46896551724138, "grad_norm": 0.7062518000602722, "learning_rate": 2.7806436781609197e-05, "loss": 0.1433, "step": 12026 }, { "epoch": 41.47241379310345, "grad_norm": 0.931562066078186, "learning_rate": 2.7805977011494252e-05, "loss": 0.1206, "step": 12027 }, { "epoch": 41.47586206896552, "grad_norm": 0.4770970940589905, "learning_rate": 2.780551724137931e-05, "loss": 0.12, "step": 12028 }, { "epoch": 41.47931034482759, "grad_norm": 0.8924275040626526, "learning_rate": 2.780505747126437e-05, "loss": 0.1306, "step": 12029 }, { "epoch": 41.48275862068966, "grad_norm": 0.6384889483451843, "learning_rate": 2.7804597701149428e-05, "loss": 0.1375, "step": 12030 }, { "epoch": 41.48620689655172, "grad_norm": 0.8084830045700073, "learning_rate": 2.7804137931034484e-05, "loss": 0.1282, "step": 12031 }, { "epoch": 41.48965517241379, "grad_norm": 0.7569974660873413, "learning_rate": 2.780367816091954e-05, "loss": 0.1064, "step": 12032 }, { "epoch": 41.49310344827586, "grad_norm": 1.03480064868927, "learning_rate": 2.78032183908046e-05, "loss": 0.1208, "step": 12033 }, { "epoch": 41.49655172413793, "grad_norm": 0.7946938276290894, "learning_rate": 2.7802758620689656e-05, "loss": 0.109, "step": 12034 }, { "epoch": 41.5, "grad_norm": 0.8630624413490295, "learning_rate": 2.7802298850574715e-05, "loss": 0.1004, "step": 12035 }, { "epoch": 41.50344827586207, "grad_norm": 0.6809442043304443, "learning_rate": 2.780183908045977e-05, "loss": 0.0845, "step": 12036 }, { "epoch": 41.50689655172414, "grad_norm": 2.457381010055542, "learning_rate": 2.780137931034483e-05, "loss": 0.0904, "step": 12037 }, { "epoch": 41.51034482758621, "grad_norm": 0.6924378275871277, "learning_rate": 2.7800919540229888e-05, "loss": 0.0918, "step": 12038 }, { "epoch": 41.51379310344828, "grad_norm": 1.8367289304733276, "learning_rate": 2.7800459770114943e-05, "loss": 0.0909, "step": 12039 }, { "epoch": 41.51724137931034, "grad_norm": 0.9530537724494934, "learning_rate": 2.78e-05, "loss": 0.1287, "step": 12040 }, { "epoch": 41.52068965517241, "grad_norm": 0.9595503211021423, "learning_rate": 2.779954022988506e-05, "loss": 0.2035, "step": 12041 }, { "epoch": 41.52413793103448, "grad_norm": 0.5874448418617249, "learning_rate": 2.7799080459770115e-05, "loss": 0.1563, "step": 12042 }, { "epoch": 41.52758620689655, "grad_norm": 0.5151929259300232, "learning_rate": 2.7798620689655174e-05, "loss": 0.1635, "step": 12043 }, { "epoch": 41.53103448275862, "grad_norm": 0.7603625059127808, "learning_rate": 2.779816091954023e-05, "loss": 0.153, "step": 12044 }, { "epoch": 41.53448275862069, "grad_norm": 1.4915159940719604, "learning_rate": 2.7797701149425285e-05, "loss": 0.1514, "step": 12045 }, { "epoch": 41.53793103448276, "grad_norm": 1.5711922645568848, "learning_rate": 2.7797241379310347e-05, "loss": 0.1465, "step": 12046 }, { "epoch": 41.54137931034483, "grad_norm": 0.8018385171890259, "learning_rate": 2.7796781609195402e-05, "loss": 0.1437, "step": 12047 }, { "epoch": 41.5448275862069, "grad_norm": 0.7438400983810425, "learning_rate": 2.779632183908046e-05, "loss": 0.1565, "step": 12048 }, { "epoch": 41.54827586206896, "grad_norm": 0.9132189750671387, "learning_rate": 2.7795862068965516e-05, "loss": 0.1559, "step": 12049 }, { "epoch": 41.55172413793103, "grad_norm": 1.0592803955078125, "learning_rate": 2.7795402298850575e-05, "loss": 0.15, "step": 12050 }, { "epoch": 41.5551724137931, "grad_norm": 0.8584337830543518, "learning_rate": 2.7794942528735633e-05, "loss": 0.1317, "step": 12051 }, { "epoch": 41.55862068965517, "grad_norm": 1.3145374059677124, "learning_rate": 2.779448275862069e-05, "loss": 0.1314, "step": 12052 }, { "epoch": 41.56206896551724, "grad_norm": 0.7178171873092651, "learning_rate": 2.7794022988505747e-05, "loss": 0.1383, "step": 12053 }, { "epoch": 41.56551724137931, "grad_norm": 0.5647254586219788, "learning_rate": 2.7793563218390806e-05, "loss": 0.1128, "step": 12054 }, { "epoch": 41.56896551724138, "grad_norm": 0.5755630731582642, "learning_rate": 2.779310344827586e-05, "loss": 0.1088, "step": 12055 }, { "epoch": 41.57241379310345, "grad_norm": 1.2065749168395996, "learning_rate": 2.779264367816092e-05, "loss": 0.111, "step": 12056 }, { "epoch": 41.57586206896552, "grad_norm": 0.9171760082244873, "learning_rate": 2.7792183908045975e-05, "loss": 0.1013, "step": 12057 }, { "epoch": 41.57931034482758, "grad_norm": 1.147233247756958, "learning_rate": 2.7791724137931037e-05, "loss": 0.1015, "step": 12058 }, { "epoch": 41.58275862068965, "grad_norm": 0.8109966516494751, "learning_rate": 2.7791264367816093e-05, "loss": 0.1171, "step": 12059 }, { "epoch": 41.58620689655172, "grad_norm": 0.848900556564331, "learning_rate": 2.7790804597701148e-05, "loss": 0.1071, "step": 12060 }, { "epoch": 41.58965517241379, "grad_norm": 0.8569918870925903, "learning_rate": 2.7790344827586207e-05, "loss": 0.1109, "step": 12061 }, { "epoch": 41.59310344827586, "grad_norm": 0.8497814536094666, "learning_rate": 2.7789885057471265e-05, "loss": 0.0893, "step": 12062 }, { "epoch": 41.59655172413793, "grad_norm": 0.9317464828491211, "learning_rate": 2.7789425287356324e-05, "loss": 0.0987, "step": 12063 }, { "epoch": 41.6, "grad_norm": 1.1550813913345337, "learning_rate": 2.778896551724138e-05, "loss": 0.1192, "step": 12064 }, { "epoch": 41.60344827586207, "grad_norm": 0.8983402252197266, "learning_rate": 2.7788505747126435e-05, "loss": 0.1161, "step": 12065 }, { "epoch": 41.60689655172414, "grad_norm": 0.5893465876579285, "learning_rate": 2.7788045977011497e-05, "loss": 0.2055, "step": 12066 }, { "epoch": 41.610344827586204, "grad_norm": 0.7007556557655334, "learning_rate": 2.7787586206896552e-05, "loss": 0.1892, "step": 12067 }, { "epoch": 41.61379310344827, "grad_norm": 0.5867757201194763, "learning_rate": 2.778712643678161e-05, "loss": 0.1792, "step": 12068 }, { "epoch": 41.61724137931034, "grad_norm": 0.5072169303894043, "learning_rate": 2.7786666666666666e-05, "loss": 0.1518, "step": 12069 }, { "epoch": 41.62068965517241, "grad_norm": 0.594620943069458, "learning_rate": 2.7786206896551725e-05, "loss": 0.161, "step": 12070 }, { "epoch": 41.62413793103448, "grad_norm": 0.7362608909606934, "learning_rate": 2.7785747126436783e-05, "loss": 0.1581, "step": 12071 }, { "epoch": 41.62758620689655, "grad_norm": 0.783055305480957, "learning_rate": 2.778528735632184e-05, "loss": 0.1633, "step": 12072 }, { "epoch": 41.63103448275862, "grad_norm": 2.7484657764434814, "learning_rate": 2.7784827586206894e-05, "loss": 0.1355, "step": 12073 }, { "epoch": 41.63448275862069, "grad_norm": 0.706342875957489, "learning_rate": 2.7784367816091956e-05, "loss": 0.1252, "step": 12074 }, { "epoch": 41.63793103448276, "grad_norm": 0.9257619976997375, "learning_rate": 2.778390804597701e-05, "loss": 0.1434, "step": 12075 }, { "epoch": 41.641379310344824, "grad_norm": 0.6289350986480713, "learning_rate": 2.778344827586207e-05, "loss": 0.1359, "step": 12076 }, { "epoch": 41.644827586206894, "grad_norm": 3.493847370147705, "learning_rate": 2.7782988505747125e-05, "loss": 0.1522, "step": 12077 }, { "epoch": 41.648275862068964, "grad_norm": 0.5806840062141418, "learning_rate": 2.7782528735632184e-05, "loss": 0.1236, "step": 12078 }, { "epoch": 41.65172413793103, "grad_norm": 0.965185284614563, "learning_rate": 2.7782068965517243e-05, "loss": 0.1192, "step": 12079 }, { "epoch": 41.6551724137931, "grad_norm": 2.00398588180542, "learning_rate": 2.7781609195402298e-05, "loss": 0.1123, "step": 12080 }, { "epoch": 41.65862068965517, "grad_norm": 0.7458328008651733, "learning_rate": 2.7781149425287357e-05, "loss": 0.1269, "step": 12081 }, { "epoch": 41.66206896551724, "grad_norm": 1.304511547088623, "learning_rate": 2.7780689655172415e-05, "loss": 0.1017, "step": 12082 }, { "epoch": 41.66551724137931, "grad_norm": 1.4882891178131104, "learning_rate": 2.778022988505747e-05, "loss": 0.0984, "step": 12083 }, { "epoch": 41.66896551724138, "grad_norm": 1.0258557796478271, "learning_rate": 2.777977011494253e-05, "loss": 0.1167, "step": 12084 }, { "epoch": 41.672413793103445, "grad_norm": 1.2177654504776, "learning_rate": 2.7779310344827585e-05, "loss": 0.1127, "step": 12085 }, { "epoch": 41.675862068965515, "grad_norm": 0.9441058039665222, "learning_rate": 2.7778850574712647e-05, "loss": 0.1001, "step": 12086 }, { "epoch": 41.679310344827584, "grad_norm": 1.191937804222107, "learning_rate": 2.7778390804597702e-05, "loss": 0.0968, "step": 12087 }, { "epoch": 41.682758620689654, "grad_norm": 0.7344348430633545, "learning_rate": 2.7777931034482757e-05, "loss": 0.1069, "step": 12088 }, { "epoch": 41.686206896551724, "grad_norm": 1.2044496536254883, "learning_rate": 2.7777471264367816e-05, "loss": 0.1061, "step": 12089 }, { "epoch": 41.689655172413794, "grad_norm": 1.0031154155731201, "learning_rate": 2.7777011494252875e-05, "loss": 0.1375, "step": 12090 }, { "epoch": 41.69310344827586, "grad_norm": 1.118801474571228, "learning_rate": 2.7776551724137933e-05, "loss": 0.2245, "step": 12091 }, { "epoch": 41.69655172413793, "grad_norm": 0.5654996037483215, "learning_rate": 2.777609195402299e-05, "loss": 0.2031, "step": 12092 }, { "epoch": 41.7, "grad_norm": 1.7007839679718018, "learning_rate": 2.7775632183908044e-05, "loss": 0.1647, "step": 12093 }, { "epoch": 41.703448275862065, "grad_norm": 1.0941885709762573, "learning_rate": 2.7775172413793106e-05, "loss": 0.1572, "step": 12094 }, { "epoch": 41.706896551724135, "grad_norm": 0.760419487953186, "learning_rate": 2.777471264367816e-05, "loss": 0.1827, "step": 12095 }, { "epoch": 41.710344827586205, "grad_norm": 0.9549381136894226, "learning_rate": 2.777425287356322e-05, "loss": 0.1599, "step": 12096 }, { "epoch": 41.713793103448275, "grad_norm": 0.8041129112243652, "learning_rate": 2.7773793103448275e-05, "loss": 0.1408, "step": 12097 }, { "epoch": 41.717241379310344, "grad_norm": 0.7309832572937012, "learning_rate": 2.7773333333333334e-05, "loss": 0.1377, "step": 12098 }, { "epoch": 41.720689655172414, "grad_norm": 0.6443786025047302, "learning_rate": 2.7772873563218393e-05, "loss": 0.1515, "step": 12099 }, { "epoch": 41.724137931034484, "grad_norm": 0.5184366703033447, "learning_rate": 2.7772413793103448e-05, "loss": 0.131, "step": 12100 }, { "epoch": 41.727586206896554, "grad_norm": 0.8346721529960632, "learning_rate": 2.7771954022988507e-05, "loss": 0.1342, "step": 12101 }, { "epoch": 41.73103448275862, "grad_norm": 0.7046213746070862, "learning_rate": 2.7771494252873565e-05, "loss": 0.1442, "step": 12102 }, { "epoch": 41.734482758620686, "grad_norm": 0.77152019739151, "learning_rate": 2.777103448275862e-05, "loss": 0.1274, "step": 12103 }, { "epoch": 41.737931034482756, "grad_norm": 0.5682352781295776, "learning_rate": 2.777057471264368e-05, "loss": 0.1206, "step": 12104 }, { "epoch": 41.741379310344826, "grad_norm": 0.6662681102752686, "learning_rate": 2.7770114942528735e-05, "loss": 0.1222, "step": 12105 }, { "epoch": 41.744827586206895, "grad_norm": 1.8693732023239136, "learning_rate": 2.7769655172413793e-05, "loss": 0.1246, "step": 12106 }, { "epoch": 41.748275862068965, "grad_norm": 1.1801698207855225, "learning_rate": 2.7769195402298852e-05, "loss": 0.1228, "step": 12107 }, { "epoch": 41.751724137931035, "grad_norm": 0.9685055613517761, "learning_rate": 2.7768735632183907e-05, "loss": 0.1199, "step": 12108 }, { "epoch": 41.755172413793105, "grad_norm": 1.0775163173675537, "learning_rate": 2.7768275862068966e-05, "loss": 0.1056, "step": 12109 }, { "epoch": 41.758620689655174, "grad_norm": 1.0267280340194702, "learning_rate": 2.7767816091954025e-05, "loss": 0.1034, "step": 12110 }, { "epoch": 41.762068965517244, "grad_norm": 0.6858788132667542, "learning_rate": 2.776735632183908e-05, "loss": 0.113, "step": 12111 }, { "epoch": 41.765517241379314, "grad_norm": 0.7057468891143799, "learning_rate": 2.776689655172414e-05, "loss": 0.0908, "step": 12112 }, { "epoch": 41.76896551724138, "grad_norm": 0.7156124711036682, "learning_rate": 2.7766436781609194e-05, "loss": 0.1003, "step": 12113 }, { "epoch": 41.772413793103446, "grad_norm": 0.7637952566146851, "learning_rate": 2.7765977011494256e-05, "loss": 0.1178, "step": 12114 }, { "epoch": 41.775862068965516, "grad_norm": 0.8371727466583252, "learning_rate": 2.776551724137931e-05, "loss": 0.1289, "step": 12115 }, { "epoch": 41.779310344827586, "grad_norm": 0.5936165452003479, "learning_rate": 2.7765057471264367e-05, "loss": 0.2142, "step": 12116 }, { "epoch": 41.782758620689656, "grad_norm": 0.6469882130622864, "learning_rate": 2.7764597701149425e-05, "loss": 0.1884, "step": 12117 }, { "epoch": 41.786206896551725, "grad_norm": 0.6366070508956909, "learning_rate": 2.7764137931034484e-05, "loss": 0.1647, "step": 12118 }, { "epoch": 41.789655172413795, "grad_norm": 0.5981214642524719, "learning_rate": 2.7763678160919543e-05, "loss": 0.1655, "step": 12119 }, { "epoch": 41.793103448275865, "grad_norm": 0.5866469740867615, "learning_rate": 2.7763218390804598e-05, "loss": 0.1567, "step": 12120 }, { "epoch": 41.796551724137935, "grad_norm": 0.48371851444244385, "learning_rate": 2.7762758620689653e-05, "loss": 0.1404, "step": 12121 }, { "epoch": 41.8, "grad_norm": 0.5218006372451782, "learning_rate": 2.7762298850574715e-05, "loss": 0.1493, "step": 12122 }, { "epoch": 41.80344827586207, "grad_norm": 0.69767165184021, "learning_rate": 2.776183908045977e-05, "loss": 0.1579, "step": 12123 }, { "epoch": 41.80689655172414, "grad_norm": 0.9146779179573059, "learning_rate": 2.776137931034483e-05, "loss": 0.1372, "step": 12124 }, { "epoch": 41.810344827586206, "grad_norm": 0.6239284873008728, "learning_rate": 2.7760919540229885e-05, "loss": 0.1234, "step": 12125 }, { "epoch": 41.813793103448276, "grad_norm": 0.5803810358047485, "learning_rate": 2.7760459770114943e-05, "loss": 0.1538, "step": 12126 }, { "epoch": 41.817241379310346, "grad_norm": 1.8442617654800415, "learning_rate": 2.7760000000000002e-05, "loss": 0.1344, "step": 12127 }, { "epoch": 41.820689655172416, "grad_norm": 0.586837887763977, "learning_rate": 2.7759540229885057e-05, "loss": 0.1422, "step": 12128 }, { "epoch": 41.824137931034485, "grad_norm": 0.6534242033958435, "learning_rate": 2.7759080459770116e-05, "loss": 0.1348, "step": 12129 }, { "epoch": 41.827586206896555, "grad_norm": 0.5508120656013489, "learning_rate": 2.7758620689655175e-05, "loss": 0.1334, "step": 12130 }, { "epoch": 41.83103448275862, "grad_norm": 0.596862256526947, "learning_rate": 2.775816091954023e-05, "loss": 0.1193, "step": 12131 }, { "epoch": 41.83448275862069, "grad_norm": 0.8953147530555725, "learning_rate": 2.775770114942529e-05, "loss": 0.1209, "step": 12132 }, { "epoch": 41.83793103448276, "grad_norm": 0.8693134784698486, "learning_rate": 2.7757241379310344e-05, "loss": 0.1111, "step": 12133 }, { "epoch": 41.84137931034483, "grad_norm": 1.7499192953109741, "learning_rate": 2.7756781609195402e-05, "loss": 0.0928, "step": 12134 }, { "epoch": 41.8448275862069, "grad_norm": 0.5858666896820068, "learning_rate": 2.775632183908046e-05, "loss": 0.093, "step": 12135 }, { "epoch": 41.84827586206897, "grad_norm": 7.566533088684082, "learning_rate": 2.7755862068965516e-05, "loss": 0.1012, "step": 12136 }, { "epoch": 41.851724137931036, "grad_norm": 1.0486544370651245, "learning_rate": 2.7755402298850575e-05, "loss": 0.0952, "step": 12137 }, { "epoch": 41.855172413793106, "grad_norm": 7.710630893707275, "learning_rate": 2.7754942528735634e-05, "loss": 0.0916, "step": 12138 }, { "epoch": 41.858620689655176, "grad_norm": 0.6773425936698914, "learning_rate": 2.775448275862069e-05, "loss": 0.0771, "step": 12139 }, { "epoch": 41.86206896551724, "grad_norm": 1.3016501665115356, "learning_rate": 2.7754022988505748e-05, "loss": 0.1461, "step": 12140 }, { "epoch": 41.86551724137931, "grad_norm": 0.8507784605026245, "learning_rate": 2.7753563218390803e-05, "loss": 0.2039, "step": 12141 }, { "epoch": 41.86896551724138, "grad_norm": 2.2648680210113525, "learning_rate": 2.7753103448275865e-05, "loss": 0.1744, "step": 12142 }, { "epoch": 41.87241379310345, "grad_norm": 0.612014651298523, "learning_rate": 2.775264367816092e-05, "loss": 0.1614, "step": 12143 }, { "epoch": 41.87586206896552, "grad_norm": 0.6273930668830872, "learning_rate": 2.7752183908045976e-05, "loss": 0.1529, "step": 12144 }, { "epoch": 41.87931034482759, "grad_norm": 0.7452580332756042, "learning_rate": 2.7751724137931034e-05, "loss": 0.1767, "step": 12145 }, { "epoch": 41.88275862068966, "grad_norm": 1.0542641878128052, "learning_rate": 2.7751264367816093e-05, "loss": 0.1477, "step": 12146 }, { "epoch": 41.88620689655173, "grad_norm": 0.7475886344909668, "learning_rate": 2.7750804597701152e-05, "loss": 0.1562, "step": 12147 }, { "epoch": 41.889655172413796, "grad_norm": 0.7821846604347229, "learning_rate": 2.7750344827586207e-05, "loss": 0.1288, "step": 12148 }, { "epoch": 41.89310344827586, "grad_norm": 0.5586880445480347, "learning_rate": 2.7749885057471262e-05, "loss": 0.1227, "step": 12149 }, { "epoch": 41.89655172413793, "grad_norm": 0.520210862159729, "learning_rate": 2.7749425287356324e-05, "loss": 0.12, "step": 12150 }, { "epoch": 41.9, "grad_norm": 0.7665215134620667, "learning_rate": 2.774896551724138e-05, "loss": 0.1078, "step": 12151 }, { "epoch": 41.90344827586207, "grad_norm": 0.6393218040466309, "learning_rate": 2.774850574712644e-05, "loss": 0.1275, "step": 12152 }, { "epoch": 41.90689655172414, "grad_norm": 0.6768131256103516, "learning_rate": 2.7748045977011494e-05, "loss": 0.1193, "step": 12153 }, { "epoch": 41.91034482758621, "grad_norm": 0.9701106548309326, "learning_rate": 2.7747586206896552e-05, "loss": 0.1241, "step": 12154 }, { "epoch": 41.91379310344828, "grad_norm": 0.8718293905258179, "learning_rate": 2.774712643678161e-05, "loss": 0.1251, "step": 12155 }, { "epoch": 41.91724137931035, "grad_norm": 0.6790260672569275, "learning_rate": 2.7746666666666666e-05, "loss": 0.1192, "step": 12156 }, { "epoch": 41.92068965517242, "grad_norm": 0.5131826996803284, "learning_rate": 2.7746206896551725e-05, "loss": 0.0939, "step": 12157 }, { "epoch": 41.92413793103448, "grad_norm": 0.5489954948425293, "learning_rate": 2.7745747126436784e-05, "loss": 0.1257, "step": 12158 }, { "epoch": 41.92758620689655, "grad_norm": 0.6982395648956299, "learning_rate": 2.774528735632184e-05, "loss": 0.1048, "step": 12159 }, { "epoch": 41.93103448275862, "grad_norm": 0.637733519077301, "learning_rate": 2.7744827586206898e-05, "loss": 0.0943, "step": 12160 }, { "epoch": 41.93448275862069, "grad_norm": 0.6394059658050537, "learning_rate": 2.7744367816091953e-05, "loss": 0.1494, "step": 12161 }, { "epoch": 41.93793103448276, "grad_norm": 0.7366625070571899, "learning_rate": 2.7743908045977012e-05, "loss": 0.0963, "step": 12162 }, { "epoch": 41.94137931034483, "grad_norm": 0.7894964218139648, "learning_rate": 2.774344827586207e-05, "loss": 0.0776, "step": 12163 }, { "epoch": 41.9448275862069, "grad_norm": 1.0178426504135132, "learning_rate": 2.7742988505747126e-05, "loss": 0.1123, "step": 12164 }, { "epoch": 41.94827586206897, "grad_norm": 0.9952828288078308, "learning_rate": 2.7742528735632184e-05, "loss": 0.1504, "step": 12165 }, { "epoch": 41.95172413793104, "grad_norm": 0.6074187159538269, "learning_rate": 2.7742068965517243e-05, "loss": 0.2026, "step": 12166 }, { "epoch": 41.9551724137931, "grad_norm": 1.011322021484375, "learning_rate": 2.77416091954023e-05, "loss": 0.1638, "step": 12167 }, { "epoch": 41.95862068965517, "grad_norm": 0.5210318565368652, "learning_rate": 2.7741149425287357e-05, "loss": 0.1464, "step": 12168 }, { "epoch": 41.96206896551724, "grad_norm": 0.7014193534851074, "learning_rate": 2.7740689655172412e-05, "loss": 0.1625, "step": 12169 }, { "epoch": 41.96551724137931, "grad_norm": 0.8528085350990295, "learning_rate": 2.7740229885057474e-05, "loss": 0.1539, "step": 12170 }, { "epoch": 41.96896551724138, "grad_norm": 1.0108246803283691, "learning_rate": 2.773977011494253e-05, "loss": 0.1317, "step": 12171 }, { "epoch": 41.97241379310345, "grad_norm": 0.49449095129966736, "learning_rate": 2.7739310344827585e-05, "loss": 0.1337, "step": 12172 }, { "epoch": 41.97586206896552, "grad_norm": 1.39926016330719, "learning_rate": 2.7738850574712644e-05, "loss": 0.1293, "step": 12173 }, { "epoch": 41.97931034482759, "grad_norm": 0.8530870079994202, "learning_rate": 2.7738390804597702e-05, "loss": 0.125, "step": 12174 }, { "epoch": 41.98275862068966, "grad_norm": 0.6534234285354614, "learning_rate": 2.773793103448276e-05, "loss": 0.111, "step": 12175 }, { "epoch": 41.98620689655172, "grad_norm": 0.6743464469909668, "learning_rate": 2.7737471264367816e-05, "loss": 0.1043, "step": 12176 }, { "epoch": 41.98965517241379, "grad_norm": 0.5683451890945435, "learning_rate": 2.773701149425287e-05, "loss": 0.0835, "step": 12177 }, { "epoch": 41.99310344827586, "grad_norm": 0.8025150895118713, "learning_rate": 2.7736551724137934e-05, "loss": 0.1117, "step": 12178 }, { "epoch": 41.99655172413793, "grad_norm": 0.8934260010719299, "learning_rate": 2.773609195402299e-05, "loss": 0.1142, "step": 12179 }, { "epoch": 42.0, "grad_norm": 0.9484627842903137, "learning_rate": 2.7735632183908048e-05, "loss": 0.1453, "step": 12180 }, { "epoch": 42.00344827586207, "grad_norm": 0.903594434261322, "learning_rate": 2.7735172413793103e-05, "loss": 0.2045, "step": 12181 }, { "epoch": 42.00689655172414, "grad_norm": 0.5158071517944336, "learning_rate": 2.773471264367816e-05, "loss": 0.1575, "step": 12182 }, { "epoch": 42.01034482758621, "grad_norm": 0.7775729298591614, "learning_rate": 2.773425287356322e-05, "loss": 0.1528, "step": 12183 }, { "epoch": 42.01379310344828, "grad_norm": 1.3706367015838623, "learning_rate": 2.7733793103448276e-05, "loss": 0.1406, "step": 12184 }, { "epoch": 42.01724137931034, "grad_norm": 0.5104444026947021, "learning_rate": 2.7733333333333334e-05, "loss": 0.143, "step": 12185 }, { "epoch": 42.02068965517241, "grad_norm": 0.4715483486652374, "learning_rate": 2.7732873563218393e-05, "loss": 0.1365, "step": 12186 }, { "epoch": 42.02413793103448, "grad_norm": 0.49133068323135376, "learning_rate": 2.773241379310345e-05, "loss": 0.1162, "step": 12187 }, { "epoch": 42.02758620689655, "grad_norm": 0.8650007247924805, "learning_rate": 2.7731954022988507e-05, "loss": 0.1414, "step": 12188 }, { "epoch": 42.03103448275862, "grad_norm": 1.1109046936035156, "learning_rate": 2.7731494252873562e-05, "loss": 0.1259, "step": 12189 }, { "epoch": 42.03448275862069, "grad_norm": 0.9704614281654358, "learning_rate": 2.773103448275862e-05, "loss": 0.1168, "step": 12190 }, { "epoch": 42.03793103448276, "grad_norm": 0.9092183709144592, "learning_rate": 2.773057471264368e-05, "loss": 0.1322, "step": 12191 }, { "epoch": 42.04137931034483, "grad_norm": 0.6951188445091248, "learning_rate": 2.7730114942528735e-05, "loss": 0.1363, "step": 12192 }, { "epoch": 42.0448275862069, "grad_norm": 0.6823880076408386, "learning_rate": 2.7729655172413794e-05, "loss": 0.1006, "step": 12193 }, { "epoch": 42.04827586206896, "grad_norm": 0.5344151854515076, "learning_rate": 2.7729195402298852e-05, "loss": 0.1137, "step": 12194 }, { "epoch": 42.05172413793103, "grad_norm": 1.333155870437622, "learning_rate": 2.7728735632183908e-05, "loss": 0.1051, "step": 12195 }, { "epoch": 42.0551724137931, "grad_norm": 0.5995664000511169, "learning_rate": 2.7728275862068966e-05, "loss": 0.1199, "step": 12196 }, { "epoch": 42.05862068965517, "grad_norm": 0.7269514203071594, "learning_rate": 2.772781609195402e-05, "loss": 0.1084, "step": 12197 }, { "epoch": 42.06206896551724, "grad_norm": 1.1524360179901123, "learning_rate": 2.7727356321839084e-05, "loss": 0.0873, "step": 12198 }, { "epoch": 42.06551724137931, "grad_norm": 0.7855322360992432, "learning_rate": 2.772689655172414e-05, "loss": 0.0792, "step": 12199 }, { "epoch": 42.06896551724138, "grad_norm": 1.3210208415985107, "learning_rate": 2.7726436781609194e-05, "loss": 0.0838, "step": 12200 }, { "epoch": 42.07241379310345, "grad_norm": 0.8058980107307434, "learning_rate": 2.7725977011494253e-05, "loss": 0.1021, "step": 12201 }, { "epoch": 42.07586206896552, "grad_norm": 0.6543471217155457, "learning_rate": 2.772551724137931e-05, "loss": 0.085, "step": 12202 }, { "epoch": 42.07931034482758, "grad_norm": 1.2874486446380615, "learning_rate": 2.772505747126437e-05, "loss": 0.0761, "step": 12203 }, { "epoch": 42.08275862068965, "grad_norm": 0.746643602848053, "learning_rate": 2.7724597701149426e-05, "loss": 0.0964, "step": 12204 }, { "epoch": 42.08620689655172, "grad_norm": 1.290374517440796, "learning_rate": 2.772413793103448e-05, "loss": 0.1337, "step": 12205 }, { "epoch": 42.08965517241379, "grad_norm": 0.810954213142395, "learning_rate": 2.7723678160919543e-05, "loss": 0.2259, "step": 12206 }, { "epoch": 42.09310344827586, "grad_norm": 0.5332759618759155, "learning_rate": 2.7723218390804598e-05, "loss": 0.1775, "step": 12207 }, { "epoch": 42.09655172413793, "grad_norm": 0.6969045400619507, "learning_rate": 2.7722758620689657e-05, "loss": 0.1535, "step": 12208 }, { "epoch": 42.1, "grad_norm": 0.4420578181743622, "learning_rate": 2.7722298850574712e-05, "loss": 0.1435, "step": 12209 }, { "epoch": 42.10344827586207, "grad_norm": 0.7969768643379211, "learning_rate": 2.772183908045977e-05, "loss": 0.1391, "step": 12210 }, { "epoch": 42.10689655172414, "grad_norm": 0.5897282361984253, "learning_rate": 2.772137931034483e-05, "loss": 0.1403, "step": 12211 }, { "epoch": 42.110344827586204, "grad_norm": 0.7075042128562927, "learning_rate": 2.7720919540229885e-05, "loss": 0.1412, "step": 12212 }, { "epoch": 42.11379310344827, "grad_norm": 0.5761647820472717, "learning_rate": 2.7720459770114944e-05, "loss": 0.1474, "step": 12213 }, { "epoch": 42.11724137931034, "grad_norm": 0.6489739418029785, "learning_rate": 2.7720000000000002e-05, "loss": 0.1171, "step": 12214 }, { "epoch": 42.12068965517241, "grad_norm": 1.3134468793869019, "learning_rate": 2.7719540229885058e-05, "loss": 0.1222, "step": 12215 }, { "epoch": 42.12413793103448, "grad_norm": 1.2588915824890137, "learning_rate": 2.7719080459770116e-05, "loss": 0.1212, "step": 12216 }, { "epoch": 42.12758620689655, "grad_norm": 0.7198085188865662, "learning_rate": 2.771862068965517e-05, "loss": 0.1135, "step": 12217 }, { "epoch": 42.13103448275862, "grad_norm": 0.9662955403327942, "learning_rate": 2.771816091954023e-05, "loss": 0.1197, "step": 12218 }, { "epoch": 42.13448275862069, "grad_norm": 1.2284237146377563, "learning_rate": 2.771770114942529e-05, "loss": 0.1213, "step": 12219 }, { "epoch": 42.13793103448276, "grad_norm": 0.5901982188224792, "learning_rate": 2.7717241379310344e-05, "loss": 0.1367, "step": 12220 }, { "epoch": 42.141379310344824, "grad_norm": 1.642527461051941, "learning_rate": 2.7716781609195403e-05, "loss": 0.0888, "step": 12221 }, { "epoch": 42.144827586206894, "grad_norm": 0.6087860465049744, "learning_rate": 2.771632183908046e-05, "loss": 0.1093, "step": 12222 }, { "epoch": 42.148275862068964, "grad_norm": 0.6174551248550415, "learning_rate": 2.7715862068965517e-05, "loss": 0.0805, "step": 12223 }, { "epoch": 42.15172413793103, "grad_norm": 1.086341142654419, "learning_rate": 2.7715402298850576e-05, "loss": 0.0953, "step": 12224 }, { "epoch": 42.1551724137931, "grad_norm": 0.5938546061515808, "learning_rate": 2.771494252873563e-05, "loss": 0.1082, "step": 12225 }, { "epoch": 42.15862068965517, "grad_norm": 0.9536235332489014, "learning_rate": 2.7714482758620693e-05, "loss": 0.116, "step": 12226 }, { "epoch": 42.16206896551724, "grad_norm": 0.5252532362937927, "learning_rate": 2.7714022988505748e-05, "loss": 0.0713, "step": 12227 }, { "epoch": 42.16551724137931, "grad_norm": 0.7668975591659546, "learning_rate": 2.7713563218390803e-05, "loss": 0.0884, "step": 12228 }, { "epoch": 42.16896551724138, "grad_norm": 1.4742944240570068, "learning_rate": 2.7713103448275862e-05, "loss": 0.0935, "step": 12229 }, { "epoch": 42.172413793103445, "grad_norm": 1.116974949836731, "learning_rate": 2.771264367816092e-05, "loss": 0.0997, "step": 12230 }, { "epoch": 42.175862068965515, "grad_norm": 0.7124062776565552, "learning_rate": 2.771218390804598e-05, "loss": 0.2135, "step": 12231 }, { "epoch": 42.179310344827584, "grad_norm": 0.8280739188194275, "learning_rate": 2.7711724137931035e-05, "loss": 0.1608, "step": 12232 }, { "epoch": 42.182758620689654, "grad_norm": 0.4658404588699341, "learning_rate": 2.771126436781609e-05, "loss": 0.1393, "step": 12233 }, { "epoch": 42.186206896551724, "grad_norm": 0.6680828332901001, "learning_rate": 2.7710804597701152e-05, "loss": 0.1367, "step": 12234 }, { "epoch": 42.189655172413794, "grad_norm": 0.5724013447761536, "learning_rate": 2.7710344827586207e-05, "loss": 0.1447, "step": 12235 }, { "epoch": 42.19310344827586, "grad_norm": 0.8500628471374512, "learning_rate": 2.7709885057471266e-05, "loss": 0.1452, "step": 12236 }, { "epoch": 42.19655172413793, "grad_norm": 0.4868459701538086, "learning_rate": 2.770942528735632e-05, "loss": 0.1447, "step": 12237 }, { "epoch": 42.2, "grad_norm": 1.0619430541992188, "learning_rate": 2.770896551724138e-05, "loss": 0.1399, "step": 12238 }, { "epoch": 42.203448275862065, "grad_norm": 0.6006832718849182, "learning_rate": 2.770850574712644e-05, "loss": 0.1307, "step": 12239 }, { "epoch": 42.206896551724135, "grad_norm": 0.7939869165420532, "learning_rate": 2.7708045977011494e-05, "loss": 0.1321, "step": 12240 }, { "epoch": 42.210344827586205, "grad_norm": 0.6474122405052185, "learning_rate": 2.7707586206896553e-05, "loss": 0.0972, "step": 12241 }, { "epoch": 42.213793103448275, "grad_norm": 0.734545886516571, "learning_rate": 2.770712643678161e-05, "loss": 0.1267, "step": 12242 }, { "epoch": 42.217241379310344, "grad_norm": 0.8388192057609558, "learning_rate": 2.7706666666666667e-05, "loss": 0.1075, "step": 12243 }, { "epoch": 42.220689655172414, "grad_norm": 0.6011995673179626, "learning_rate": 2.7706206896551725e-05, "loss": 0.125, "step": 12244 }, { "epoch": 42.224137931034484, "grad_norm": 0.625085711479187, "learning_rate": 2.770574712643678e-05, "loss": 0.1159, "step": 12245 }, { "epoch": 42.227586206896554, "grad_norm": 1.7637776136398315, "learning_rate": 2.7705287356321843e-05, "loss": 0.0939, "step": 12246 }, { "epoch": 42.23103448275862, "grad_norm": 0.8120837807655334, "learning_rate": 2.7704827586206898e-05, "loss": 0.1007, "step": 12247 }, { "epoch": 42.234482758620686, "grad_norm": 0.5814431309700012, "learning_rate": 2.7704367816091953e-05, "loss": 0.1123, "step": 12248 }, { "epoch": 42.237931034482756, "grad_norm": 0.6666516065597534, "learning_rate": 2.7703908045977012e-05, "loss": 0.0934, "step": 12249 }, { "epoch": 42.241379310344826, "grad_norm": 1.5320792198181152, "learning_rate": 2.770344827586207e-05, "loss": 0.0858, "step": 12250 }, { "epoch": 42.244827586206895, "grad_norm": 0.6509525179862976, "learning_rate": 2.7702988505747126e-05, "loss": 0.0997, "step": 12251 }, { "epoch": 42.248275862068965, "grad_norm": 1.182822346687317, "learning_rate": 2.7702528735632185e-05, "loss": 0.0995, "step": 12252 }, { "epoch": 42.251724137931035, "grad_norm": 0.5089275240898132, "learning_rate": 2.770206896551724e-05, "loss": 0.0726, "step": 12253 }, { "epoch": 42.255172413793105, "grad_norm": 0.8418914079666138, "learning_rate": 2.7701609195402302e-05, "loss": 0.0729, "step": 12254 }, { "epoch": 42.258620689655174, "grad_norm": 1.28119695186615, "learning_rate": 2.7701149425287357e-05, "loss": 0.1561, "step": 12255 }, { "epoch": 42.262068965517244, "grad_norm": 0.6547238826751709, "learning_rate": 2.7700689655172413e-05, "loss": 0.2195, "step": 12256 }, { "epoch": 42.265517241379314, "grad_norm": 0.9701516032218933, "learning_rate": 2.770022988505747e-05, "loss": 0.1677, "step": 12257 }, { "epoch": 42.26896551724138, "grad_norm": 0.5671674609184265, "learning_rate": 2.769977011494253e-05, "loss": 0.1805, "step": 12258 }, { "epoch": 42.272413793103446, "grad_norm": 0.5383252501487732, "learning_rate": 2.769931034482759e-05, "loss": 0.1682, "step": 12259 }, { "epoch": 42.275862068965516, "grad_norm": 1.5424975156784058, "learning_rate": 2.7698850574712644e-05, "loss": 0.1322, "step": 12260 }, { "epoch": 42.279310344827586, "grad_norm": 0.4917159974575043, "learning_rate": 2.76983908045977e-05, "loss": 0.1353, "step": 12261 }, { "epoch": 42.282758620689656, "grad_norm": 1.4433965682983398, "learning_rate": 2.769793103448276e-05, "loss": 0.1342, "step": 12262 }, { "epoch": 42.286206896551725, "grad_norm": 0.5652056336402893, "learning_rate": 2.7697471264367817e-05, "loss": 0.1449, "step": 12263 }, { "epoch": 42.289655172413795, "grad_norm": 0.5982080698013306, "learning_rate": 2.7697011494252875e-05, "loss": 0.1567, "step": 12264 }, { "epoch": 42.293103448275865, "grad_norm": 0.8898783922195435, "learning_rate": 2.769655172413793e-05, "loss": 0.1195, "step": 12265 }, { "epoch": 42.296551724137935, "grad_norm": 0.6887409090995789, "learning_rate": 2.769609195402299e-05, "loss": 0.1061, "step": 12266 }, { "epoch": 42.3, "grad_norm": 0.5151865482330322, "learning_rate": 2.7695632183908048e-05, "loss": 0.1237, "step": 12267 }, { "epoch": 42.30344827586207, "grad_norm": 0.5057904124259949, "learning_rate": 2.7695172413793103e-05, "loss": 0.1308, "step": 12268 }, { "epoch": 42.30689655172414, "grad_norm": 0.5378608107566833, "learning_rate": 2.7694712643678162e-05, "loss": 0.1211, "step": 12269 }, { "epoch": 42.310344827586206, "grad_norm": 0.9591206908226013, "learning_rate": 2.769425287356322e-05, "loss": 0.1206, "step": 12270 }, { "epoch": 42.313793103448276, "grad_norm": 0.8281500339508057, "learning_rate": 2.7693793103448276e-05, "loss": 0.1134, "step": 12271 }, { "epoch": 42.317241379310346, "grad_norm": 0.6697930097579956, "learning_rate": 2.7693333333333335e-05, "loss": 0.1094, "step": 12272 }, { "epoch": 42.320689655172416, "grad_norm": 0.7397933006286621, "learning_rate": 2.769287356321839e-05, "loss": 0.0947, "step": 12273 }, { "epoch": 42.324137931034485, "grad_norm": 1.4610857963562012, "learning_rate": 2.7692413793103452e-05, "loss": 0.1182, "step": 12274 }, { "epoch": 42.327586206896555, "grad_norm": 0.780810534954071, "learning_rate": 2.7691954022988507e-05, "loss": 0.0983, "step": 12275 }, { "epoch": 42.33103448275862, "grad_norm": 0.7199638485908508, "learning_rate": 2.7691494252873563e-05, "loss": 0.0958, "step": 12276 }, { "epoch": 42.33448275862069, "grad_norm": 0.8544325828552246, "learning_rate": 2.769103448275862e-05, "loss": 0.092, "step": 12277 }, { "epoch": 42.33793103448276, "grad_norm": 0.7095324397087097, "learning_rate": 2.769057471264368e-05, "loss": 0.0827, "step": 12278 }, { "epoch": 42.34137931034483, "grad_norm": Infinity, "learning_rate": 2.769057471264368e-05, "loss": 0.0948, "step": 12279 }, { "epoch": 42.3448275862069, "grad_norm": 1.9553078413009644, "learning_rate": 2.7690114942528735e-05, "loss": 0.1249, "step": 12280 }, { "epoch": 42.34827586206897, "grad_norm": 0.8538755774497986, "learning_rate": 2.7689655172413794e-05, "loss": 0.1917, "step": 12281 }, { "epoch": 42.351724137931036, "grad_norm": 0.4669039249420166, "learning_rate": 2.768919540229885e-05, "loss": 0.1511, "step": 12282 }, { "epoch": 42.355172413793106, "grad_norm": 0.5411926507949829, "learning_rate": 2.768873563218391e-05, "loss": 0.1623, "step": 12283 }, { "epoch": 42.358620689655176, "grad_norm": 0.6145544052124023, "learning_rate": 2.7688275862068967e-05, "loss": 0.136, "step": 12284 }, { "epoch": 42.36206896551724, "grad_norm": 0.5223448276519775, "learning_rate": 2.7687816091954022e-05, "loss": 0.1602, "step": 12285 }, { "epoch": 42.36551724137931, "grad_norm": 2.0380306243896484, "learning_rate": 2.768735632183908e-05, "loss": 0.1319, "step": 12286 }, { "epoch": 42.36896551724138, "grad_norm": 0.5194566249847412, "learning_rate": 2.768689655172414e-05, "loss": 0.1377, "step": 12287 }, { "epoch": 42.37241379310345, "grad_norm": 0.5135104060173035, "learning_rate": 2.7686436781609198e-05, "loss": 0.1245, "step": 12288 }, { "epoch": 42.37586206896552, "grad_norm": 0.9097325801849365, "learning_rate": 2.7685977011494253e-05, "loss": 0.1221, "step": 12289 }, { "epoch": 42.37931034482759, "grad_norm": 0.4846218526363373, "learning_rate": 2.768551724137931e-05, "loss": 0.1092, "step": 12290 }, { "epoch": 42.38275862068966, "grad_norm": 0.5502746105194092, "learning_rate": 2.768505747126437e-05, "loss": 0.1248, "step": 12291 }, { "epoch": 42.38620689655173, "grad_norm": 0.9966171979904175, "learning_rate": 2.7684597701149426e-05, "loss": 0.1129, "step": 12292 }, { "epoch": 42.389655172413796, "grad_norm": 0.5583094954490662, "learning_rate": 2.7684137931034485e-05, "loss": 0.1196, "step": 12293 }, { "epoch": 42.39310344827586, "grad_norm": 0.9678485989570618, "learning_rate": 2.768367816091954e-05, "loss": 0.1031, "step": 12294 }, { "epoch": 42.39655172413793, "grad_norm": 1.0910438299179077, "learning_rate": 2.76832183908046e-05, "loss": 0.1001, "step": 12295 }, { "epoch": 42.4, "grad_norm": 1.2865018844604492, "learning_rate": 2.7682758620689657e-05, "loss": 0.1232, "step": 12296 }, { "epoch": 42.40344827586207, "grad_norm": 0.6711270213127136, "learning_rate": 2.7682298850574713e-05, "loss": 0.1076, "step": 12297 }, { "epoch": 42.40689655172414, "grad_norm": 1.7754416465759277, "learning_rate": 2.768183908045977e-05, "loss": 0.1202, "step": 12298 }, { "epoch": 42.41034482758621, "grad_norm": 0.7308286428451538, "learning_rate": 2.768137931034483e-05, "loss": 0.1173, "step": 12299 }, { "epoch": 42.41379310344828, "grad_norm": 0.6135823130607605, "learning_rate": 2.7680919540229885e-05, "loss": 0.097, "step": 12300 }, { "epoch": 42.41724137931035, "grad_norm": 0.5591568946838379, "learning_rate": 2.7680459770114944e-05, "loss": 0.0767, "step": 12301 }, { "epoch": 42.42068965517242, "grad_norm": 1.1470907926559448, "learning_rate": 2.768e-05, "loss": 0.0792, "step": 12302 }, { "epoch": 42.42413793103448, "grad_norm": 0.8879388570785522, "learning_rate": 2.767954022988506e-05, "loss": 0.0902, "step": 12303 }, { "epoch": 42.42758620689655, "grad_norm": 2.779059648513794, "learning_rate": 2.7679080459770117e-05, "loss": 0.1007, "step": 12304 }, { "epoch": 42.43103448275862, "grad_norm": 1.9503741264343262, "learning_rate": 2.7678620689655172e-05, "loss": 0.1253, "step": 12305 }, { "epoch": 42.43448275862069, "grad_norm": 1.141107439994812, "learning_rate": 2.767816091954023e-05, "loss": 0.2048, "step": 12306 }, { "epoch": 42.43793103448276, "grad_norm": 0.5600235462188721, "learning_rate": 2.767770114942529e-05, "loss": 0.1726, "step": 12307 }, { "epoch": 42.44137931034483, "grad_norm": 0.5383155345916748, "learning_rate": 2.7677241379310345e-05, "loss": 0.1531, "step": 12308 }, { "epoch": 42.4448275862069, "grad_norm": 0.7156419157981873, "learning_rate": 2.7676781609195403e-05, "loss": 0.1494, "step": 12309 }, { "epoch": 42.44827586206897, "grad_norm": 0.938689649105072, "learning_rate": 2.767632183908046e-05, "loss": 0.1454, "step": 12310 }, { "epoch": 42.45172413793104, "grad_norm": 0.6941089034080505, "learning_rate": 2.767586206896552e-05, "loss": 0.1415, "step": 12311 }, { "epoch": 42.4551724137931, "grad_norm": 0.6894468665122986, "learning_rate": 2.7675402298850576e-05, "loss": 0.1491, "step": 12312 }, { "epoch": 42.45862068965517, "grad_norm": 0.47965243458747864, "learning_rate": 2.767494252873563e-05, "loss": 0.1437, "step": 12313 }, { "epoch": 42.46206896551724, "grad_norm": 0.6213566660881042, "learning_rate": 2.767448275862069e-05, "loss": 0.1369, "step": 12314 }, { "epoch": 42.46551724137931, "grad_norm": 0.7234206795692444, "learning_rate": 2.767402298850575e-05, "loss": 0.1252, "step": 12315 }, { "epoch": 42.46896551724138, "grad_norm": 0.5491325855255127, "learning_rate": 2.7673563218390807e-05, "loss": 0.1333, "step": 12316 }, { "epoch": 42.47241379310345, "grad_norm": 0.6321347951889038, "learning_rate": 2.7673103448275863e-05, "loss": 0.1342, "step": 12317 }, { "epoch": 42.47586206896552, "grad_norm": 0.6357162594795227, "learning_rate": 2.7672643678160918e-05, "loss": 0.1273, "step": 12318 }, { "epoch": 42.47931034482759, "grad_norm": 0.5733805894851685, "learning_rate": 2.767218390804598e-05, "loss": 0.1099, "step": 12319 }, { "epoch": 42.48275862068966, "grad_norm": 1.4942951202392578, "learning_rate": 2.7671724137931035e-05, "loss": 0.1201, "step": 12320 }, { "epoch": 42.48620689655172, "grad_norm": 1.073393702507019, "learning_rate": 2.7671264367816094e-05, "loss": 0.1195, "step": 12321 }, { "epoch": 42.48965517241379, "grad_norm": 0.6567122936248779, "learning_rate": 2.767080459770115e-05, "loss": 0.1085, "step": 12322 }, { "epoch": 42.49310344827586, "grad_norm": 0.7082690000534058, "learning_rate": 2.7670344827586208e-05, "loss": 0.0983, "step": 12323 }, { "epoch": 42.49655172413793, "grad_norm": 1.068361759185791, "learning_rate": 2.7669885057471267e-05, "loss": 0.1105, "step": 12324 }, { "epoch": 42.5, "grad_norm": 0.8533188700675964, "learning_rate": 2.7669425287356322e-05, "loss": 0.1035, "step": 12325 }, { "epoch": 42.50344827586207, "grad_norm": 2.092324733734131, "learning_rate": 2.766896551724138e-05, "loss": 0.1077, "step": 12326 }, { "epoch": 42.50689655172414, "grad_norm": 0.899259626865387, "learning_rate": 2.766850574712644e-05, "loss": 0.0854, "step": 12327 }, { "epoch": 42.51034482758621, "grad_norm": 0.8427188396453857, "learning_rate": 2.7668045977011495e-05, "loss": 0.0927, "step": 12328 }, { "epoch": 42.51379310344828, "grad_norm": 0.7925485968589783, "learning_rate": 2.7667586206896553e-05, "loss": 0.101, "step": 12329 }, { "epoch": 42.51724137931034, "grad_norm": 0.9041317105293274, "learning_rate": 2.766712643678161e-05, "loss": 0.1144, "step": 12330 }, { "epoch": 42.52068965517241, "grad_norm": 0.8858479261398315, "learning_rate": 2.766666666666667e-05, "loss": 0.1902, "step": 12331 }, { "epoch": 42.52413793103448, "grad_norm": 0.531556248664856, "learning_rate": 2.7666206896551726e-05, "loss": 0.1756, "step": 12332 }, { "epoch": 42.52758620689655, "grad_norm": 0.7320764064788818, "learning_rate": 2.766574712643678e-05, "loss": 0.1824, "step": 12333 }, { "epoch": 42.53103448275862, "grad_norm": 0.5350877046585083, "learning_rate": 2.766528735632184e-05, "loss": 0.1672, "step": 12334 }, { "epoch": 42.53448275862069, "grad_norm": 0.6857014894485474, "learning_rate": 2.76648275862069e-05, "loss": 0.1622, "step": 12335 }, { "epoch": 42.53793103448276, "grad_norm": 0.6282629370689392, "learning_rate": 2.7664367816091957e-05, "loss": 0.1449, "step": 12336 }, { "epoch": 42.54137931034483, "grad_norm": 0.6359657645225525, "learning_rate": 2.7663908045977013e-05, "loss": 0.1544, "step": 12337 }, { "epoch": 42.5448275862069, "grad_norm": 0.526971161365509, "learning_rate": 2.7663448275862068e-05, "loss": 0.1333, "step": 12338 }, { "epoch": 42.54827586206896, "grad_norm": 0.45664873719215393, "learning_rate": 2.766298850574713e-05, "loss": 0.1321, "step": 12339 }, { "epoch": 42.55172413793103, "grad_norm": 0.5623223185539246, "learning_rate": 2.7662528735632185e-05, "loss": 0.1314, "step": 12340 }, { "epoch": 42.5551724137931, "grad_norm": 0.6999375224113464, "learning_rate": 2.766206896551724e-05, "loss": 0.1349, "step": 12341 }, { "epoch": 42.55862068965517, "grad_norm": 0.8193678259849548, "learning_rate": 2.76616091954023e-05, "loss": 0.1304, "step": 12342 }, { "epoch": 42.56206896551724, "grad_norm": 0.5906043648719788, "learning_rate": 2.7661149425287358e-05, "loss": 0.1198, "step": 12343 }, { "epoch": 42.56551724137931, "grad_norm": 0.5983156561851501, "learning_rate": 2.7660689655172417e-05, "loss": 0.1181, "step": 12344 }, { "epoch": 42.56896551724138, "grad_norm": 0.5836758613586426, "learning_rate": 2.7660229885057472e-05, "loss": 0.1104, "step": 12345 }, { "epoch": 42.57241379310345, "grad_norm": 0.6678853034973145, "learning_rate": 2.7659770114942527e-05, "loss": 0.1286, "step": 12346 }, { "epoch": 42.57586206896552, "grad_norm": 1.0233614444732666, "learning_rate": 2.7659310344827586e-05, "loss": 0.1063, "step": 12347 }, { "epoch": 42.57931034482758, "grad_norm": 0.5883191227912903, "learning_rate": 2.7658850574712644e-05, "loss": 0.0927, "step": 12348 }, { "epoch": 42.58275862068965, "grad_norm": 0.7339363098144531, "learning_rate": 2.7658390804597703e-05, "loss": 0.0958, "step": 12349 }, { "epoch": 42.58620689655172, "grad_norm": 0.584865927696228, "learning_rate": 2.765793103448276e-05, "loss": 0.0861, "step": 12350 }, { "epoch": 42.58965517241379, "grad_norm": 0.6941563487052917, "learning_rate": 2.7657471264367814e-05, "loss": 0.105, "step": 12351 }, { "epoch": 42.59310344827586, "grad_norm": 0.8007941246032715, "learning_rate": 2.7657011494252876e-05, "loss": 0.0851, "step": 12352 }, { "epoch": 42.59655172413793, "grad_norm": 0.7304241061210632, "learning_rate": 2.765655172413793e-05, "loss": 0.0964, "step": 12353 }, { "epoch": 42.6, "grad_norm": 1.1517338752746582, "learning_rate": 2.765609195402299e-05, "loss": 0.0752, "step": 12354 }, { "epoch": 42.60344827586207, "grad_norm": 1.0046385526657104, "learning_rate": 2.7655632183908045e-05, "loss": 0.1299, "step": 12355 }, { "epoch": 42.60689655172414, "grad_norm": 0.912848711013794, "learning_rate": 2.7655172413793104e-05, "loss": 0.1976, "step": 12356 }, { "epoch": 42.610344827586204, "grad_norm": 0.5581660866737366, "learning_rate": 2.7654712643678162e-05, "loss": 0.1715, "step": 12357 }, { "epoch": 42.61379310344827, "grad_norm": 0.6890144944190979, "learning_rate": 2.7654252873563218e-05, "loss": 0.1573, "step": 12358 }, { "epoch": 42.61724137931034, "grad_norm": 0.5898194313049316, "learning_rate": 2.7653793103448276e-05, "loss": 0.1602, "step": 12359 }, { "epoch": 42.62068965517241, "grad_norm": 0.8196572065353394, "learning_rate": 2.7653333333333335e-05, "loss": 0.1371, "step": 12360 }, { "epoch": 42.62413793103448, "grad_norm": 0.5661188364028931, "learning_rate": 2.765287356321839e-05, "loss": 0.1515, "step": 12361 }, { "epoch": 42.62758620689655, "grad_norm": 0.8779640793800354, "learning_rate": 2.765241379310345e-05, "loss": 0.159, "step": 12362 }, { "epoch": 42.63103448275862, "grad_norm": 0.8723864555358887, "learning_rate": 2.7651954022988504e-05, "loss": 0.1333, "step": 12363 }, { "epoch": 42.63448275862069, "grad_norm": 0.9635327458381653, "learning_rate": 2.7651494252873566e-05, "loss": 0.1321, "step": 12364 }, { "epoch": 42.63793103448276, "grad_norm": 0.6513822674751282, "learning_rate": 2.7651034482758622e-05, "loss": 0.1193, "step": 12365 }, { "epoch": 42.641379310344824, "grad_norm": 0.6542658805847168, "learning_rate": 2.7650574712643677e-05, "loss": 0.1198, "step": 12366 }, { "epoch": 42.644827586206894, "grad_norm": 0.9672492742538452, "learning_rate": 2.7650114942528736e-05, "loss": 0.1501, "step": 12367 }, { "epoch": 42.648275862068964, "grad_norm": 0.6129736304283142, "learning_rate": 2.7649655172413794e-05, "loss": 0.1094, "step": 12368 }, { "epoch": 42.65172413793103, "grad_norm": 0.650691568851471, "learning_rate": 2.764919540229885e-05, "loss": 0.0982, "step": 12369 }, { "epoch": 42.6551724137931, "grad_norm": 0.8674739003181458, "learning_rate": 2.764873563218391e-05, "loss": 0.105, "step": 12370 }, { "epoch": 42.65862068965517, "grad_norm": 0.6859492063522339, "learning_rate": 2.7648275862068964e-05, "loss": 0.1018, "step": 12371 }, { "epoch": 42.66206896551724, "grad_norm": 6.391971588134766, "learning_rate": 2.7647816091954026e-05, "loss": 0.1064, "step": 12372 }, { "epoch": 42.66551724137931, "grad_norm": 1.123070240020752, "learning_rate": 2.764735632183908e-05, "loss": 0.0966, "step": 12373 }, { "epoch": 42.66896551724138, "grad_norm": 0.6556448936462402, "learning_rate": 2.7646896551724136e-05, "loss": 0.1002, "step": 12374 }, { "epoch": 42.672413793103445, "grad_norm": 0.8712849020957947, "learning_rate": 2.7646436781609195e-05, "loss": 0.1194, "step": 12375 }, { "epoch": 42.675862068965515, "grad_norm": 0.7270256876945496, "learning_rate": 2.7645977011494254e-05, "loss": 0.102, "step": 12376 }, { "epoch": 42.679310344827584, "grad_norm": 0.7862454056739807, "learning_rate": 2.7645517241379312e-05, "loss": 0.084, "step": 12377 }, { "epoch": 42.682758620689654, "grad_norm": 0.6514825224876404, "learning_rate": 2.7645057471264368e-05, "loss": 0.093, "step": 12378 }, { "epoch": 42.686206896551724, "grad_norm": 0.6954602003097534, "learning_rate": 2.7644597701149423e-05, "loss": 0.0958, "step": 12379 }, { "epoch": 42.689655172413794, "grad_norm": 1.64128577709198, "learning_rate": 2.7644137931034485e-05, "loss": 0.1077, "step": 12380 }, { "epoch": 42.69310344827586, "grad_norm": 1.0550535917282104, "learning_rate": 2.764367816091954e-05, "loss": 0.1988, "step": 12381 }, { "epoch": 42.69655172413793, "grad_norm": 1.0305176973342896, "learning_rate": 2.76432183908046e-05, "loss": 0.1635, "step": 12382 }, { "epoch": 42.7, "grad_norm": 0.7299582362174988, "learning_rate": 2.7642758620689654e-05, "loss": 0.164, "step": 12383 }, { "epoch": 42.703448275862065, "grad_norm": 0.6596978902816772, "learning_rate": 2.7642298850574713e-05, "loss": 0.1631, "step": 12384 }, { "epoch": 42.706896551724135, "grad_norm": 1.2943531274795532, "learning_rate": 2.764183908045977e-05, "loss": 0.1519, "step": 12385 }, { "epoch": 42.710344827586205, "grad_norm": 0.5535593628883362, "learning_rate": 2.7641379310344827e-05, "loss": 0.1468, "step": 12386 }, { "epoch": 42.713793103448275, "grad_norm": 1.2994685173034668, "learning_rate": 2.7640919540229886e-05, "loss": 0.1688, "step": 12387 }, { "epoch": 42.717241379310344, "grad_norm": 0.6057288646697998, "learning_rate": 2.7640459770114944e-05, "loss": 0.1468, "step": 12388 }, { "epoch": 42.720689655172414, "grad_norm": 1.2057924270629883, "learning_rate": 2.764e-05, "loss": 0.1271, "step": 12389 }, { "epoch": 42.724137931034484, "grad_norm": 0.5587315559387207, "learning_rate": 2.763954022988506e-05, "loss": 0.1423, "step": 12390 }, { "epoch": 42.727586206896554, "grad_norm": 0.6339237689971924, "learning_rate": 2.7639080459770114e-05, "loss": 0.1393, "step": 12391 }, { "epoch": 42.73103448275862, "grad_norm": 0.7360854744911194, "learning_rate": 2.7638620689655176e-05, "loss": 0.1167, "step": 12392 }, { "epoch": 42.734482758620686, "grad_norm": 0.6951438784599304, "learning_rate": 2.763816091954023e-05, "loss": 0.1176, "step": 12393 }, { "epoch": 42.737931034482756, "grad_norm": 0.5324638485908508, "learning_rate": 2.7637701149425286e-05, "loss": 0.1097, "step": 12394 }, { "epoch": 42.741379310344826, "grad_norm": 0.5147148370742798, "learning_rate": 2.7637241379310345e-05, "loss": 0.1203, "step": 12395 }, { "epoch": 42.744827586206895, "grad_norm": 0.581943154335022, "learning_rate": 2.7636781609195404e-05, "loss": 0.1032, "step": 12396 }, { "epoch": 42.748275862068965, "grad_norm": 0.6139575242996216, "learning_rate": 2.763632183908046e-05, "loss": 0.1027, "step": 12397 }, { "epoch": 42.751724137931035, "grad_norm": 1.1132451295852661, "learning_rate": 2.7635862068965518e-05, "loss": 0.1066, "step": 12398 }, { "epoch": 42.755172413793105, "grad_norm": 0.8477392792701721, "learning_rate": 2.7635402298850573e-05, "loss": 0.1132, "step": 12399 }, { "epoch": 42.758620689655174, "grad_norm": 0.6008499264717102, "learning_rate": 2.7634942528735635e-05, "loss": 0.0906, "step": 12400 }, { "epoch": 42.762068965517244, "grad_norm": 0.6274664998054504, "learning_rate": 2.763448275862069e-05, "loss": 0.0899, "step": 12401 }, { "epoch": 42.765517241379314, "grad_norm": 0.6850747466087341, "learning_rate": 2.7634022988505746e-05, "loss": 0.0848, "step": 12402 }, { "epoch": 42.76896551724138, "grad_norm": 0.7779936790466309, "learning_rate": 2.7633563218390804e-05, "loss": 0.1001, "step": 12403 }, { "epoch": 42.772413793103446, "grad_norm": 0.7306268215179443, "learning_rate": 2.7633103448275863e-05, "loss": 0.0944, "step": 12404 }, { "epoch": 42.775862068965516, "grad_norm": 3.456782341003418, "learning_rate": 2.763264367816092e-05, "loss": 0.1292, "step": 12405 }, { "epoch": 42.779310344827586, "grad_norm": 0.672888457775116, "learning_rate": 2.7632183908045977e-05, "loss": 0.1931, "step": 12406 }, { "epoch": 42.782758620689656, "grad_norm": 0.5845856666564941, "learning_rate": 2.7631724137931032e-05, "loss": 0.1562, "step": 12407 }, { "epoch": 42.786206896551725, "grad_norm": 0.9147916436195374, "learning_rate": 2.7631264367816094e-05, "loss": 0.1603, "step": 12408 }, { "epoch": 42.789655172413795, "grad_norm": 0.5879402160644531, "learning_rate": 2.763080459770115e-05, "loss": 0.1766, "step": 12409 }, { "epoch": 42.793103448275865, "grad_norm": 0.5182527899742126, "learning_rate": 2.7630344827586208e-05, "loss": 0.161, "step": 12410 }, { "epoch": 42.796551724137935, "grad_norm": 0.7032468914985657, "learning_rate": 2.7629885057471264e-05, "loss": 0.1377, "step": 12411 }, { "epoch": 42.8, "grad_norm": 0.5114709734916687, "learning_rate": 2.7629425287356322e-05, "loss": 0.139, "step": 12412 }, { "epoch": 42.80344827586207, "grad_norm": 0.7635828852653503, "learning_rate": 2.762896551724138e-05, "loss": 0.1298, "step": 12413 }, { "epoch": 42.80689655172414, "grad_norm": 0.598565399646759, "learning_rate": 2.7628505747126436e-05, "loss": 0.1143, "step": 12414 }, { "epoch": 42.810344827586206, "grad_norm": 0.9309161901473999, "learning_rate": 2.7628045977011495e-05, "loss": 0.1381, "step": 12415 }, { "epoch": 42.813793103448276, "grad_norm": 0.6073570847511292, "learning_rate": 2.7627586206896554e-05, "loss": 0.1287, "step": 12416 }, { "epoch": 42.817241379310346, "grad_norm": 1.8658616542816162, "learning_rate": 2.762712643678161e-05, "loss": 0.1287, "step": 12417 }, { "epoch": 42.820689655172416, "grad_norm": 0.6564040184020996, "learning_rate": 2.7626666666666668e-05, "loss": 0.1243, "step": 12418 }, { "epoch": 42.824137931034485, "grad_norm": 0.7970454692840576, "learning_rate": 2.7626206896551723e-05, "loss": 0.119, "step": 12419 }, { "epoch": 42.827586206896555, "grad_norm": 0.7526986002922058, "learning_rate": 2.7625747126436785e-05, "loss": 0.1255, "step": 12420 }, { "epoch": 42.83103448275862, "grad_norm": 0.7218035459518433, "learning_rate": 2.762528735632184e-05, "loss": 0.1069, "step": 12421 }, { "epoch": 42.83448275862069, "grad_norm": 0.6880247592926025, "learning_rate": 2.7624827586206896e-05, "loss": 0.1121, "step": 12422 }, { "epoch": 42.83793103448276, "grad_norm": 0.6599641442298889, "learning_rate": 2.7624367816091954e-05, "loss": 0.1016, "step": 12423 }, { "epoch": 42.84137931034483, "grad_norm": 0.8341142535209656, "learning_rate": 2.7623908045977013e-05, "loss": 0.0911, "step": 12424 }, { "epoch": 42.8448275862069, "grad_norm": 1.062445878982544, "learning_rate": 2.762344827586207e-05, "loss": 0.0826, "step": 12425 }, { "epoch": 42.84827586206897, "grad_norm": 1.0385099649429321, "learning_rate": 2.7622988505747127e-05, "loss": 0.0855, "step": 12426 }, { "epoch": 42.851724137931036, "grad_norm": 0.7486385107040405, "learning_rate": 2.7622528735632182e-05, "loss": 0.0929, "step": 12427 }, { "epoch": 42.855172413793106, "grad_norm": 0.8621944785118103, "learning_rate": 2.7622068965517244e-05, "loss": 0.0914, "step": 12428 }, { "epoch": 42.858620689655176, "grad_norm": 1.0994873046875, "learning_rate": 2.76216091954023e-05, "loss": 0.0939, "step": 12429 }, { "epoch": 42.86206896551724, "grad_norm": 1.5918235778808594, "learning_rate": 2.7621149425287355e-05, "loss": 0.1557, "step": 12430 }, { "epoch": 42.86551724137931, "grad_norm": 0.6028462648391724, "learning_rate": 2.7620689655172413e-05, "loss": 0.2003, "step": 12431 }, { "epoch": 42.86896551724138, "grad_norm": 0.5532031655311584, "learning_rate": 2.7620229885057472e-05, "loss": 0.1695, "step": 12432 }, { "epoch": 42.87241379310345, "grad_norm": 1.2067397832870483, "learning_rate": 2.761977011494253e-05, "loss": 0.1639, "step": 12433 }, { "epoch": 42.87586206896552, "grad_norm": 2.227405071258545, "learning_rate": 2.7619310344827586e-05, "loss": 0.1297, "step": 12434 }, { "epoch": 42.87931034482759, "grad_norm": 0.9159512519836426, "learning_rate": 2.761885057471264e-05, "loss": 0.1279, "step": 12435 }, { "epoch": 42.88275862068966, "grad_norm": 0.6326916217803955, "learning_rate": 2.7618390804597704e-05, "loss": 0.1296, "step": 12436 }, { "epoch": 42.88620689655173, "grad_norm": 0.645932674407959, "learning_rate": 2.761793103448276e-05, "loss": 0.1346, "step": 12437 }, { "epoch": 42.889655172413796, "grad_norm": 0.5505738258361816, "learning_rate": 2.7617471264367818e-05, "loss": 0.1358, "step": 12438 }, { "epoch": 42.89310344827586, "grad_norm": 0.6252630949020386, "learning_rate": 2.7617011494252873e-05, "loss": 0.1258, "step": 12439 }, { "epoch": 42.89655172413793, "grad_norm": 0.6087584495544434, "learning_rate": 2.761655172413793e-05, "loss": 0.1264, "step": 12440 }, { "epoch": 42.9, "grad_norm": 0.6099993586540222, "learning_rate": 2.761609195402299e-05, "loss": 0.1335, "step": 12441 }, { "epoch": 42.90344827586207, "grad_norm": 0.6180718541145325, "learning_rate": 2.7615632183908045e-05, "loss": 0.1287, "step": 12442 }, { "epoch": 42.90689655172414, "grad_norm": 0.6735804677009583, "learning_rate": 2.7615172413793104e-05, "loss": 0.1018, "step": 12443 }, { "epoch": 42.91034482758621, "grad_norm": 0.8710311651229858, "learning_rate": 2.7614712643678163e-05, "loss": 0.1173, "step": 12444 }, { "epoch": 42.91379310344828, "grad_norm": 0.5103098154067993, "learning_rate": 2.7614252873563218e-05, "loss": 0.1076, "step": 12445 }, { "epoch": 42.91724137931035, "grad_norm": 0.7663959860801697, "learning_rate": 2.7613793103448277e-05, "loss": 0.0998, "step": 12446 }, { "epoch": 42.92068965517242, "grad_norm": 1.0252004861831665, "learning_rate": 2.7613333333333332e-05, "loss": 0.1106, "step": 12447 }, { "epoch": 42.92413793103448, "grad_norm": 0.7117668390274048, "learning_rate": 2.7612873563218394e-05, "loss": 0.1234, "step": 12448 }, { "epoch": 42.92758620689655, "grad_norm": 0.9323391318321228, "learning_rate": 2.761241379310345e-05, "loss": 0.1165, "step": 12449 }, { "epoch": 42.93103448275862, "grad_norm": 0.8793924450874329, "learning_rate": 2.7611954022988505e-05, "loss": 0.0988, "step": 12450 }, { "epoch": 42.93448275862069, "grad_norm": 1.238680362701416, "learning_rate": 2.7611494252873563e-05, "loss": 0.093, "step": 12451 }, { "epoch": 42.93793103448276, "grad_norm": 0.7309713959693909, "learning_rate": 2.7611034482758622e-05, "loss": 0.0968, "step": 12452 }, { "epoch": 42.94137931034483, "grad_norm": 0.9936012625694275, "learning_rate": 2.761057471264368e-05, "loss": 0.0902, "step": 12453 }, { "epoch": 42.9448275862069, "grad_norm": 0.8661735653877258, "learning_rate": 2.7610114942528736e-05, "loss": 0.0963, "step": 12454 }, { "epoch": 42.94827586206897, "grad_norm": 1.3662736415863037, "learning_rate": 2.760965517241379e-05, "loss": 0.1139, "step": 12455 }, { "epoch": 42.95172413793104, "grad_norm": 0.6826170682907104, "learning_rate": 2.7609195402298853e-05, "loss": 0.1926, "step": 12456 }, { "epoch": 42.9551724137931, "grad_norm": 0.5050478577613831, "learning_rate": 2.760873563218391e-05, "loss": 0.1436, "step": 12457 }, { "epoch": 42.95862068965517, "grad_norm": 0.9991623759269714, "learning_rate": 2.7608275862068964e-05, "loss": 0.1588, "step": 12458 }, { "epoch": 42.96206896551724, "grad_norm": 0.5344852209091187, "learning_rate": 2.7607816091954023e-05, "loss": 0.1412, "step": 12459 }, { "epoch": 42.96551724137931, "grad_norm": 1.7255173921585083, "learning_rate": 2.760735632183908e-05, "loss": 0.1405, "step": 12460 }, { "epoch": 42.96896551724138, "grad_norm": 0.7861097455024719, "learning_rate": 2.760689655172414e-05, "loss": 0.1252, "step": 12461 }, { "epoch": 42.97241379310345, "grad_norm": 0.970613956451416, "learning_rate": 2.7606436781609195e-05, "loss": 0.1277, "step": 12462 }, { "epoch": 42.97586206896552, "grad_norm": 0.784081757068634, "learning_rate": 2.760597701149425e-05, "loss": 0.126, "step": 12463 }, { "epoch": 42.97931034482759, "grad_norm": 0.5436315536499023, "learning_rate": 2.7605517241379313e-05, "loss": 0.1116, "step": 12464 }, { "epoch": 42.98275862068966, "grad_norm": 0.8146080374717712, "learning_rate": 2.7605057471264368e-05, "loss": 0.1079, "step": 12465 }, { "epoch": 42.98620689655172, "grad_norm": 0.6820071339607239, "learning_rate": 2.7604597701149427e-05, "loss": 0.1372, "step": 12466 }, { "epoch": 42.98965517241379, "grad_norm": 1.244184970855713, "learning_rate": 2.7604137931034482e-05, "loss": 0.0953, "step": 12467 }, { "epoch": 42.99310344827586, "grad_norm": 0.8693459033966064, "learning_rate": 2.760367816091954e-05, "loss": 0.0923, "step": 12468 }, { "epoch": 42.99655172413793, "grad_norm": 1.0966649055480957, "learning_rate": 2.76032183908046e-05, "loss": 0.1082, "step": 12469 }, { "epoch": 43.0, "grad_norm": 0.9600239992141724, "learning_rate": 2.7602758620689655e-05, "loss": 0.1457, "step": 12470 }, { "epoch": 43.00344827586207, "grad_norm": 1.1018973588943481, "learning_rate": 2.7602298850574713e-05, "loss": 0.2315, "step": 12471 }, { "epoch": 43.00689655172414, "grad_norm": 0.884218692779541, "learning_rate": 2.7601839080459772e-05, "loss": 0.1504, "step": 12472 }, { "epoch": 43.01034482758621, "grad_norm": 0.8445684909820557, "learning_rate": 2.7601379310344827e-05, "loss": 0.1584, "step": 12473 }, { "epoch": 43.01379310344828, "grad_norm": 0.71871018409729, "learning_rate": 2.7600919540229886e-05, "loss": 0.1398, "step": 12474 }, { "epoch": 43.01724137931034, "grad_norm": 0.6717864871025085, "learning_rate": 2.760045977011494e-05, "loss": 0.132, "step": 12475 }, { "epoch": 43.02068965517241, "grad_norm": 0.7986140847206116, "learning_rate": 2.7600000000000003e-05, "loss": 0.1278, "step": 12476 }, { "epoch": 43.02413793103448, "grad_norm": 0.6787200570106506, "learning_rate": 2.759954022988506e-05, "loss": 0.1299, "step": 12477 }, { "epoch": 43.02758620689655, "grad_norm": 0.5642905831336975, "learning_rate": 2.7599080459770114e-05, "loss": 0.1243, "step": 12478 }, { "epoch": 43.03103448275862, "grad_norm": 0.6468654274940491, "learning_rate": 2.7598620689655173e-05, "loss": 0.1145, "step": 12479 }, { "epoch": 43.03448275862069, "grad_norm": 1.4864609241485596, "learning_rate": 2.759816091954023e-05, "loss": 0.1152, "step": 12480 }, { "epoch": 43.03793103448276, "grad_norm": 0.7483205199241638, "learning_rate": 2.759770114942529e-05, "loss": 0.1144, "step": 12481 }, { "epoch": 43.04137931034483, "grad_norm": 0.5358518958091736, "learning_rate": 2.7597241379310345e-05, "loss": 0.1096, "step": 12482 }, { "epoch": 43.0448275862069, "grad_norm": 0.5000337362289429, "learning_rate": 2.75967816091954e-05, "loss": 0.1052, "step": 12483 }, { "epoch": 43.04827586206896, "grad_norm": 0.5467570424079895, "learning_rate": 2.7596321839080463e-05, "loss": 0.1098, "step": 12484 }, { "epoch": 43.05172413793103, "grad_norm": 1.5375444889068604, "learning_rate": 2.7595862068965518e-05, "loss": 0.1247, "step": 12485 }, { "epoch": 43.0551724137931, "grad_norm": 0.8951303362846375, "learning_rate": 2.7595402298850573e-05, "loss": 0.1045, "step": 12486 }, { "epoch": 43.05862068965517, "grad_norm": 0.5015122890472412, "learning_rate": 2.7594942528735632e-05, "loss": 0.0941, "step": 12487 }, { "epoch": 43.06206896551724, "grad_norm": 0.88801109790802, "learning_rate": 2.759448275862069e-05, "loss": 0.0947, "step": 12488 }, { "epoch": 43.06551724137931, "grad_norm": 0.8705257177352905, "learning_rate": 2.759402298850575e-05, "loss": 0.1018, "step": 12489 }, { "epoch": 43.06896551724138, "grad_norm": 0.9234997630119324, "learning_rate": 2.7593563218390805e-05, "loss": 0.0838, "step": 12490 }, { "epoch": 43.07241379310345, "grad_norm": 0.5555614233016968, "learning_rate": 2.759310344827586e-05, "loss": 0.0808, "step": 12491 }, { "epoch": 43.07586206896552, "grad_norm": 0.6640954613685608, "learning_rate": 2.7592643678160922e-05, "loss": 0.0671, "step": 12492 }, { "epoch": 43.07931034482758, "grad_norm": 0.7836798429489136, "learning_rate": 2.7592183908045977e-05, "loss": 0.0683, "step": 12493 }, { "epoch": 43.08275862068965, "grad_norm": 1.0442423820495605, "learning_rate": 2.7591724137931036e-05, "loss": 0.0905, "step": 12494 }, { "epoch": 43.08620689655172, "grad_norm": 1.2216980457305908, "learning_rate": 2.759126436781609e-05, "loss": 0.1332, "step": 12495 }, { "epoch": 43.08965517241379, "grad_norm": 0.5459175705909729, "learning_rate": 2.759080459770115e-05, "loss": 0.1827, "step": 12496 }, { "epoch": 43.09310344827586, "grad_norm": 1.0451064109802246, "learning_rate": 2.759034482758621e-05, "loss": 0.1613, "step": 12497 }, { "epoch": 43.09655172413793, "grad_norm": 1.2220501899719238, "learning_rate": 2.7589885057471264e-05, "loss": 0.1541, "step": 12498 }, { "epoch": 43.1, "grad_norm": 0.622886598110199, "learning_rate": 2.7589425287356323e-05, "loss": 0.1374, "step": 12499 }, { "epoch": 43.10344827586207, "grad_norm": 0.5442842245101929, "learning_rate": 2.758896551724138e-05, "loss": 0.1391, "step": 12500 }, { "epoch": 43.10689655172414, "grad_norm": 0.6182793974876404, "learning_rate": 2.7588505747126437e-05, "loss": 0.1388, "step": 12501 }, { "epoch": 43.110344827586204, "grad_norm": 0.5542934536933899, "learning_rate": 2.7588045977011495e-05, "loss": 0.1222, "step": 12502 }, { "epoch": 43.11379310344827, "grad_norm": 0.8160423040390015, "learning_rate": 2.758758620689655e-05, "loss": 0.1247, "step": 12503 }, { "epoch": 43.11724137931034, "grad_norm": 0.5584928393363953, "learning_rate": 2.7587126436781613e-05, "loss": 0.1387, "step": 12504 }, { "epoch": 43.12068965517241, "grad_norm": 1.1012860536575317, "learning_rate": 2.7586666666666668e-05, "loss": 0.1122, "step": 12505 }, { "epoch": 43.12413793103448, "grad_norm": 0.7504003047943115, "learning_rate": 2.7586206896551723e-05, "loss": 0.1053, "step": 12506 }, { "epoch": 43.12758620689655, "grad_norm": 0.5903267860412598, "learning_rate": 2.7585747126436782e-05, "loss": 0.0991, "step": 12507 }, { "epoch": 43.13103448275862, "grad_norm": 0.6466259360313416, "learning_rate": 2.758528735632184e-05, "loss": 0.1202, "step": 12508 }, { "epoch": 43.13448275862069, "grad_norm": 0.7533183097839355, "learning_rate": 2.75848275862069e-05, "loss": 0.1032, "step": 12509 }, { "epoch": 43.13793103448276, "grad_norm": 0.9822704195976257, "learning_rate": 2.7584367816091955e-05, "loss": 0.1189, "step": 12510 }, { "epoch": 43.141379310344824, "grad_norm": 0.718666136264801, "learning_rate": 2.758390804597701e-05, "loss": 0.0873, "step": 12511 }, { "epoch": 43.144827586206894, "grad_norm": 0.7975372672080994, "learning_rate": 2.7583448275862072e-05, "loss": 0.1006, "step": 12512 }, { "epoch": 43.148275862068964, "grad_norm": 0.6436412930488586, "learning_rate": 2.7582988505747127e-05, "loss": 0.0923, "step": 12513 }, { "epoch": 43.15172413793103, "grad_norm": 0.5486376285552979, "learning_rate": 2.7582528735632186e-05, "loss": 0.0967, "step": 12514 }, { "epoch": 43.1551724137931, "grad_norm": 0.9540050625801086, "learning_rate": 2.758206896551724e-05, "loss": 0.0758, "step": 12515 }, { "epoch": 43.15862068965517, "grad_norm": 0.679517924785614, "learning_rate": 2.75816091954023e-05, "loss": 0.0671, "step": 12516 }, { "epoch": 43.16206896551724, "grad_norm": 0.6609022617340088, "learning_rate": 2.758114942528736e-05, "loss": 0.0723, "step": 12517 }, { "epoch": 43.16551724137931, "grad_norm": 0.9930670261383057, "learning_rate": 2.7580689655172414e-05, "loss": 0.0663, "step": 12518 }, { "epoch": 43.16896551724138, "grad_norm": 2.2618706226348877, "learning_rate": 2.758022988505747e-05, "loss": 0.0726, "step": 12519 }, { "epoch": 43.172413793103445, "grad_norm": 3.05651593208313, "learning_rate": 2.757977011494253e-05, "loss": 0.1138, "step": 12520 }, { "epoch": 43.175862068965515, "grad_norm": 1.0515953302383423, "learning_rate": 2.7579310344827587e-05, "loss": 0.2181, "step": 12521 }, { "epoch": 43.179310344827584, "grad_norm": 0.4927392899990082, "learning_rate": 2.7578850574712645e-05, "loss": 0.1391, "step": 12522 }, { "epoch": 43.182758620689654, "grad_norm": 0.4992712736129761, "learning_rate": 2.75783908045977e-05, "loss": 0.1364, "step": 12523 }, { "epoch": 43.186206896551724, "grad_norm": 0.6681078672409058, "learning_rate": 2.757793103448276e-05, "loss": 0.1367, "step": 12524 }, { "epoch": 43.189655172413794, "grad_norm": 0.5244475603103638, "learning_rate": 2.7577471264367818e-05, "loss": 0.1317, "step": 12525 }, { "epoch": 43.19310344827586, "grad_norm": 0.6835941672325134, "learning_rate": 2.7577011494252873e-05, "loss": 0.1305, "step": 12526 }, { "epoch": 43.19655172413793, "grad_norm": 0.6074662804603577, "learning_rate": 2.7576551724137932e-05, "loss": 0.1312, "step": 12527 }, { "epoch": 43.2, "grad_norm": 0.750680148601532, "learning_rate": 2.757609195402299e-05, "loss": 0.1221, "step": 12528 }, { "epoch": 43.203448275862065, "grad_norm": 0.8562726378440857, "learning_rate": 2.7575632183908046e-05, "loss": 0.1264, "step": 12529 }, { "epoch": 43.206896551724135, "grad_norm": 0.5898932814598083, "learning_rate": 2.7575172413793105e-05, "loss": 0.1157, "step": 12530 }, { "epoch": 43.210344827586205, "grad_norm": 0.6681073904037476, "learning_rate": 2.757471264367816e-05, "loss": 0.1309, "step": 12531 }, { "epoch": 43.213793103448275, "grad_norm": 0.8679671287536621, "learning_rate": 2.7574252873563222e-05, "loss": 0.1145, "step": 12532 }, { "epoch": 43.217241379310344, "grad_norm": 0.5348373651504517, "learning_rate": 2.7573793103448277e-05, "loss": 0.1051, "step": 12533 }, { "epoch": 43.220689655172414, "grad_norm": 0.600523829460144, "learning_rate": 2.7573333333333332e-05, "loss": 0.1085, "step": 12534 }, { "epoch": 43.224137931034484, "grad_norm": 0.7946656942367554, "learning_rate": 2.757287356321839e-05, "loss": 0.0979, "step": 12535 }, { "epoch": 43.227586206896554, "grad_norm": 0.593420684337616, "learning_rate": 2.757241379310345e-05, "loss": 0.0928, "step": 12536 }, { "epoch": 43.23103448275862, "grad_norm": 0.8442454934120178, "learning_rate": 2.757195402298851e-05, "loss": 0.0935, "step": 12537 }, { "epoch": 43.234482758620686, "grad_norm": 0.6810393929481506, "learning_rate": 2.7571494252873564e-05, "loss": 0.0994, "step": 12538 }, { "epoch": 43.237931034482756, "grad_norm": 0.7766817212104797, "learning_rate": 2.757103448275862e-05, "loss": 0.084, "step": 12539 }, { "epoch": 43.241379310344826, "grad_norm": 1.9653230905532837, "learning_rate": 2.757057471264368e-05, "loss": 0.0865, "step": 12540 }, { "epoch": 43.244827586206895, "grad_norm": 1.1669747829437256, "learning_rate": 2.7570114942528736e-05, "loss": 0.0851, "step": 12541 }, { "epoch": 43.248275862068965, "grad_norm": 0.9057814478874207, "learning_rate": 2.7569655172413795e-05, "loss": 0.0715, "step": 12542 }, { "epoch": 43.251724137931035, "grad_norm": 0.7314331531524658, "learning_rate": 2.756919540229885e-05, "loss": 0.0892, "step": 12543 }, { "epoch": 43.255172413793105, "grad_norm": 0.67987459897995, "learning_rate": 2.756873563218391e-05, "loss": 0.0763, "step": 12544 }, { "epoch": 43.258620689655174, "grad_norm": 1.2625941038131714, "learning_rate": 2.7568275862068968e-05, "loss": 0.0939, "step": 12545 }, { "epoch": 43.262068965517244, "grad_norm": 0.6488187909126282, "learning_rate": 2.7567816091954023e-05, "loss": 0.1771, "step": 12546 }, { "epoch": 43.265517241379314, "grad_norm": 0.6080543398857117, "learning_rate": 2.756735632183908e-05, "loss": 0.1736, "step": 12547 }, { "epoch": 43.26896551724138, "grad_norm": 0.684212863445282, "learning_rate": 2.756689655172414e-05, "loss": 0.1563, "step": 12548 }, { "epoch": 43.272413793103446, "grad_norm": 0.736425518989563, "learning_rate": 2.7566436781609196e-05, "loss": 0.1297, "step": 12549 }, { "epoch": 43.275862068965516, "grad_norm": 0.7392458915710449, "learning_rate": 2.7565977011494254e-05, "loss": 0.136, "step": 12550 }, { "epoch": 43.279310344827586, "grad_norm": 1.0272949934005737, "learning_rate": 2.756551724137931e-05, "loss": 0.1249, "step": 12551 }, { "epoch": 43.282758620689656, "grad_norm": 0.7066290378570557, "learning_rate": 2.756505747126437e-05, "loss": 0.1205, "step": 12552 }, { "epoch": 43.286206896551725, "grad_norm": 0.6198564171791077, "learning_rate": 2.7564597701149427e-05, "loss": 0.118, "step": 12553 }, { "epoch": 43.289655172413795, "grad_norm": 0.9898940324783325, "learning_rate": 2.7564137931034482e-05, "loss": 0.1318, "step": 12554 }, { "epoch": 43.293103448275865, "grad_norm": 0.8510098457336426, "learning_rate": 2.756367816091954e-05, "loss": 0.1319, "step": 12555 }, { "epoch": 43.296551724137935, "grad_norm": 0.6859474778175354, "learning_rate": 2.75632183908046e-05, "loss": 0.1268, "step": 12556 }, { "epoch": 43.3, "grad_norm": 0.9047034382820129, "learning_rate": 2.7562758620689655e-05, "loss": 0.1143, "step": 12557 }, { "epoch": 43.30344827586207, "grad_norm": 0.76570063829422, "learning_rate": 2.7562298850574714e-05, "loss": 0.1042, "step": 12558 }, { "epoch": 43.30689655172414, "grad_norm": 0.5988571047782898, "learning_rate": 2.756183908045977e-05, "loss": 0.101, "step": 12559 }, { "epoch": 43.310344827586206, "grad_norm": 0.8364578485488892, "learning_rate": 2.756137931034483e-05, "loss": 0.0911, "step": 12560 }, { "epoch": 43.313793103448276, "grad_norm": 3.632071018218994, "learning_rate": 2.7560919540229886e-05, "loss": 0.104, "step": 12561 }, { "epoch": 43.317241379310346, "grad_norm": 0.8944108486175537, "learning_rate": 2.7560459770114942e-05, "loss": 0.0926, "step": 12562 }, { "epoch": 43.320689655172416, "grad_norm": 0.6433554291725159, "learning_rate": 2.756e-05, "loss": 0.1062, "step": 12563 }, { "epoch": 43.324137931034485, "grad_norm": 0.5234349370002747, "learning_rate": 2.755954022988506e-05, "loss": 0.0831, "step": 12564 }, { "epoch": 43.327586206896555, "grad_norm": 0.8704130053520203, "learning_rate": 2.7559080459770118e-05, "loss": 0.0877, "step": 12565 }, { "epoch": 43.33103448275862, "grad_norm": 0.5884206295013428, "learning_rate": 2.7558620689655173e-05, "loss": 0.0877, "step": 12566 }, { "epoch": 43.33448275862069, "grad_norm": 0.7223765254020691, "learning_rate": 2.755816091954023e-05, "loss": 0.0722, "step": 12567 }, { "epoch": 43.33793103448276, "grad_norm": 0.8837615251541138, "learning_rate": 2.755770114942529e-05, "loss": 0.0802, "step": 12568 }, { "epoch": 43.34137931034483, "grad_norm": 0.8307111859321594, "learning_rate": 2.7557241379310346e-05, "loss": 0.0899, "step": 12569 }, { "epoch": 43.3448275862069, "grad_norm": 0.9597994685173035, "learning_rate": 2.7556781609195404e-05, "loss": 0.1314, "step": 12570 }, { "epoch": 43.34827586206897, "grad_norm": 0.699899435043335, "learning_rate": 2.755632183908046e-05, "loss": 0.1675, "step": 12571 }, { "epoch": 43.351724137931036, "grad_norm": 0.625847578048706, "learning_rate": 2.755586206896552e-05, "loss": 0.132, "step": 12572 }, { "epoch": 43.355172413793106, "grad_norm": 0.5842390656471252, "learning_rate": 2.7555402298850577e-05, "loss": 0.1498, "step": 12573 }, { "epoch": 43.358620689655176, "grad_norm": 0.594301164150238, "learning_rate": 2.7554942528735632e-05, "loss": 0.1472, "step": 12574 }, { "epoch": 43.36206896551724, "grad_norm": 0.913621723651886, "learning_rate": 2.7554482758620688e-05, "loss": 0.1364, "step": 12575 }, { "epoch": 43.36551724137931, "grad_norm": 0.6527331471443176, "learning_rate": 2.755402298850575e-05, "loss": 0.1513, "step": 12576 }, { "epoch": 43.36896551724138, "grad_norm": 1.2692371606826782, "learning_rate": 2.7553563218390805e-05, "loss": 0.1329, "step": 12577 }, { "epoch": 43.37241379310345, "grad_norm": 0.5954597592353821, "learning_rate": 2.7553103448275864e-05, "loss": 0.1288, "step": 12578 }, { "epoch": 43.37586206896552, "grad_norm": 0.6175194382667542, "learning_rate": 2.755264367816092e-05, "loss": 0.115, "step": 12579 }, { "epoch": 43.37931034482759, "grad_norm": 0.453108549118042, "learning_rate": 2.7552183908045978e-05, "loss": 0.1071, "step": 12580 }, { "epoch": 43.38275862068966, "grad_norm": 0.6792948246002197, "learning_rate": 2.7551724137931036e-05, "loss": 0.0994, "step": 12581 }, { "epoch": 43.38620689655173, "grad_norm": 0.5822312235832214, "learning_rate": 2.755126436781609e-05, "loss": 0.1208, "step": 12582 }, { "epoch": 43.389655172413796, "grad_norm": 0.7040293216705322, "learning_rate": 2.755080459770115e-05, "loss": 0.0901, "step": 12583 }, { "epoch": 43.39310344827586, "grad_norm": 0.6335213780403137, "learning_rate": 2.755034482758621e-05, "loss": 0.1183, "step": 12584 }, { "epoch": 43.39655172413793, "grad_norm": 0.7158591151237488, "learning_rate": 2.7549885057471264e-05, "loss": 0.1031, "step": 12585 }, { "epoch": 43.4, "grad_norm": 0.66414874792099, "learning_rate": 2.7549425287356323e-05, "loss": 0.0996, "step": 12586 }, { "epoch": 43.40344827586207, "grad_norm": 1.775996208190918, "learning_rate": 2.7548965517241378e-05, "loss": 0.091, "step": 12587 }, { "epoch": 43.40689655172414, "grad_norm": 0.9467819333076477, "learning_rate": 2.754850574712644e-05, "loss": 0.1037, "step": 12588 }, { "epoch": 43.41034482758621, "grad_norm": 0.5181794762611389, "learning_rate": 2.7548045977011496e-05, "loss": 0.0742, "step": 12589 }, { "epoch": 43.41379310344828, "grad_norm": 0.6289264559745789, "learning_rate": 2.754758620689655e-05, "loss": 0.079, "step": 12590 }, { "epoch": 43.41724137931035, "grad_norm": 0.8784658312797546, "learning_rate": 2.754712643678161e-05, "loss": 0.0803, "step": 12591 }, { "epoch": 43.42068965517242, "grad_norm": 0.7244560122489929, "learning_rate": 2.754666666666667e-05, "loss": 0.0802, "step": 12592 }, { "epoch": 43.42413793103448, "grad_norm": 0.8983346819877625, "learning_rate": 2.7546206896551727e-05, "loss": 0.0973, "step": 12593 }, { "epoch": 43.42758620689655, "grad_norm": 0.9020401835441589, "learning_rate": 2.7545747126436782e-05, "loss": 0.099, "step": 12594 }, { "epoch": 43.43103448275862, "grad_norm": 2.868655204772949, "learning_rate": 2.7545287356321838e-05, "loss": 0.1532, "step": 12595 }, { "epoch": 43.43448275862069, "grad_norm": 0.7861863374710083, "learning_rate": 2.75448275862069e-05, "loss": 0.1915, "step": 12596 }, { "epoch": 43.43793103448276, "grad_norm": 1.4555798768997192, "learning_rate": 2.7544367816091955e-05, "loss": 0.1785, "step": 12597 }, { "epoch": 43.44137931034483, "grad_norm": 0.6247400641441345, "learning_rate": 2.7543908045977014e-05, "loss": 0.1652, "step": 12598 }, { "epoch": 43.4448275862069, "grad_norm": 0.7091150283813477, "learning_rate": 2.754344827586207e-05, "loss": 0.1472, "step": 12599 }, { "epoch": 43.44827586206897, "grad_norm": 0.521169126033783, "learning_rate": 2.7542988505747128e-05, "loss": 0.1542, "step": 12600 }, { "epoch": 43.45172413793104, "grad_norm": 0.5173097252845764, "learning_rate": 2.7542528735632186e-05, "loss": 0.1312, "step": 12601 }, { "epoch": 43.4551724137931, "grad_norm": 0.6901552081108093, "learning_rate": 2.754206896551724e-05, "loss": 0.1235, "step": 12602 }, { "epoch": 43.45862068965517, "grad_norm": 0.5915068984031677, "learning_rate": 2.75416091954023e-05, "loss": 0.1335, "step": 12603 }, { "epoch": 43.46206896551724, "grad_norm": 0.6613263487815857, "learning_rate": 2.754114942528736e-05, "loss": 0.119, "step": 12604 }, { "epoch": 43.46551724137931, "grad_norm": 0.7811642289161682, "learning_rate": 2.7540689655172414e-05, "loss": 0.1146, "step": 12605 }, { "epoch": 43.46896551724138, "grad_norm": 1.1116379499435425, "learning_rate": 2.7540229885057473e-05, "loss": 0.1027, "step": 12606 }, { "epoch": 43.47241379310345, "grad_norm": 1.3518704175949097, "learning_rate": 2.7539770114942528e-05, "loss": 0.1244, "step": 12607 }, { "epoch": 43.47586206896552, "grad_norm": 0.5341408848762512, "learning_rate": 2.7539310344827587e-05, "loss": 0.1066, "step": 12608 }, { "epoch": 43.47931034482759, "grad_norm": 0.8320196866989136, "learning_rate": 2.7538850574712646e-05, "loss": 0.1284, "step": 12609 }, { "epoch": 43.48275862068966, "grad_norm": 1.0705081224441528, "learning_rate": 2.75383908045977e-05, "loss": 0.0934, "step": 12610 }, { "epoch": 43.48620689655172, "grad_norm": 0.7676813006401062, "learning_rate": 2.753793103448276e-05, "loss": 0.0883, "step": 12611 }, { "epoch": 43.48965517241379, "grad_norm": 0.7167148590087891, "learning_rate": 2.7537471264367818e-05, "loss": 0.1092, "step": 12612 }, { "epoch": 43.49310344827586, "grad_norm": 1.0331329107284546, "learning_rate": 2.7537011494252874e-05, "loss": 0.0989, "step": 12613 }, { "epoch": 43.49655172413793, "grad_norm": 0.6743007302284241, "learning_rate": 2.7536551724137932e-05, "loss": 0.0845, "step": 12614 }, { "epoch": 43.5, "grad_norm": 0.7467735409736633, "learning_rate": 2.7536091954022988e-05, "loss": 0.0995, "step": 12615 }, { "epoch": 43.50344827586207, "grad_norm": 0.7226132154464722, "learning_rate": 2.753563218390805e-05, "loss": 0.0969, "step": 12616 }, { "epoch": 43.50689655172414, "grad_norm": 1.0832440853118896, "learning_rate": 2.7535172413793105e-05, "loss": 0.0893, "step": 12617 }, { "epoch": 43.51034482758621, "grad_norm": 1.0074599981307983, "learning_rate": 2.753471264367816e-05, "loss": 0.0892, "step": 12618 }, { "epoch": 43.51379310344828, "grad_norm": 1.390784502029419, "learning_rate": 2.753425287356322e-05, "loss": 0.1098, "step": 12619 }, { "epoch": 43.51724137931034, "grad_norm": 1.6730564832687378, "learning_rate": 2.7533793103448278e-05, "loss": 0.1353, "step": 12620 }, { "epoch": 43.52068965517241, "grad_norm": 0.5794726014137268, "learning_rate": 2.7533333333333336e-05, "loss": 0.1707, "step": 12621 }, { "epoch": 43.52413793103448, "grad_norm": 0.5361167788505554, "learning_rate": 2.753287356321839e-05, "loss": 0.1471, "step": 12622 }, { "epoch": 43.52758620689655, "grad_norm": 0.5526849031448364, "learning_rate": 2.7532413793103447e-05, "loss": 0.1516, "step": 12623 }, { "epoch": 43.53103448275862, "grad_norm": 0.8172004222869873, "learning_rate": 2.753195402298851e-05, "loss": 0.1733, "step": 12624 }, { "epoch": 43.53448275862069, "grad_norm": 0.489565908908844, "learning_rate": 2.7531494252873564e-05, "loss": 0.1533, "step": 12625 }, { "epoch": 43.53793103448276, "grad_norm": 0.5492371916770935, "learning_rate": 2.7531034482758623e-05, "loss": 0.1448, "step": 12626 }, { "epoch": 43.54137931034483, "grad_norm": 0.6569371819496155, "learning_rate": 2.7530574712643678e-05, "loss": 0.1578, "step": 12627 }, { "epoch": 43.5448275862069, "grad_norm": 1.1204218864440918, "learning_rate": 2.7530114942528737e-05, "loss": 0.116, "step": 12628 }, { "epoch": 43.54827586206896, "grad_norm": 2.177384614944458, "learning_rate": 2.7529655172413796e-05, "loss": 0.124, "step": 12629 }, { "epoch": 43.55172413793103, "grad_norm": 0.6176684498786926, "learning_rate": 2.752919540229885e-05, "loss": 0.1226, "step": 12630 }, { "epoch": 43.5551724137931, "grad_norm": 0.6527654528617859, "learning_rate": 2.752873563218391e-05, "loss": 0.112, "step": 12631 }, { "epoch": 43.55862068965517, "grad_norm": 0.5353135466575623, "learning_rate": 2.7528275862068968e-05, "loss": 0.1057, "step": 12632 }, { "epoch": 43.56206896551724, "grad_norm": 0.9045434594154358, "learning_rate": 2.7527816091954024e-05, "loss": 0.1197, "step": 12633 }, { "epoch": 43.56551724137931, "grad_norm": 1.1878589391708374, "learning_rate": 2.7527356321839082e-05, "loss": 0.0937, "step": 12634 }, { "epoch": 43.56896551724138, "grad_norm": 0.6404416561126709, "learning_rate": 2.7526896551724137e-05, "loss": 0.1342, "step": 12635 }, { "epoch": 43.57241379310345, "grad_norm": 1.0489988327026367, "learning_rate": 2.7526436781609196e-05, "loss": 0.1088, "step": 12636 }, { "epoch": 43.57586206896552, "grad_norm": 0.6268208026885986, "learning_rate": 2.7525977011494255e-05, "loss": 0.1085, "step": 12637 }, { "epoch": 43.57931034482758, "grad_norm": 0.7660331130027771, "learning_rate": 2.752551724137931e-05, "loss": 0.0849, "step": 12638 }, { "epoch": 43.58275862068965, "grad_norm": 0.9891315698623657, "learning_rate": 2.752505747126437e-05, "loss": 0.1077, "step": 12639 }, { "epoch": 43.58620689655172, "grad_norm": 0.8228360414505005, "learning_rate": 2.7524597701149428e-05, "loss": 0.0843, "step": 12640 }, { "epoch": 43.58965517241379, "grad_norm": 1.4174340963363647, "learning_rate": 2.7524137931034483e-05, "loss": 0.0849, "step": 12641 }, { "epoch": 43.59310344827586, "grad_norm": 0.636838436126709, "learning_rate": 2.752367816091954e-05, "loss": 0.0878, "step": 12642 }, { "epoch": 43.59655172413793, "grad_norm": 0.5948421359062195, "learning_rate": 2.7523218390804597e-05, "loss": 0.0651, "step": 12643 }, { "epoch": 43.6, "grad_norm": 1.2060868740081787, "learning_rate": 2.7522758620689655e-05, "loss": 0.1034, "step": 12644 }, { "epoch": 43.60344827586207, "grad_norm": 5.071809768676758, "learning_rate": 2.7522298850574714e-05, "loss": 0.1171, "step": 12645 }, { "epoch": 43.60689655172414, "grad_norm": 0.6568869948387146, "learning_rate": 2.752183908045977e-05, "loss": 0.1936, "step": 12646 }, { "epoch": 43.610344827586204, "grad_norm": 1.3267329931259155, "learning_rate": 2.7521379310344828e-05, "loss": 0.1519, "step": 12647 }, { "epoch": 43.61379310344827, "grad_norm": 0.48198169469833374, "learning_rate": 2.7520919540229883e-05, "loss": 0.1652, "step": 12648 }, { "epoch": 43.61724137931034, "grad_norm": 0.6367897987365723, "learning_rate": 2.7520459770114945e-05, "loss": 0.1637, "step": 12649 }, { "epoch": 43.62068965517241, "grad_norm": 0.5810509920120239, "learning_rate": 2.752e-05, "loss": 0.1454, "step": 12650 }, { "epoch": 43.62413793103448, "grad_norm": 0.5677583813667297, "learning_rate": 2.7519540229885056e-05, "loss": 0.1315, "step": 12651 }, { "epoch": 43.62758620689655, "grad_norm": 0.521271288394928, "learning_rate": 2.7519080459770115e-05, "loss": 0.1337, "step": 12652 }, { "epoch": 43.63103448275862, "grad_norm": 0.5840628743171692, "learning_rate": 2.7518620689655173e-05, "loss": 0.1335, "step": 12653 }, { "epoch": 43.63448275862069, "grad_norm": 0.7567822933197021, "learning_rate": 2.7518160919540232e-05, "loss": 0.1191, "step": 12654 }, { "epoch": 43.63793103448276, "grad_norm": 0.6866278052330017, "learning_rate": 2.7517701149425287e-05, "loss": 0.1245, "step": 12655 }, { "epoch": 43.641379310344824, "grad_norm": 0.9651748538017273, "learning_rate": 2.7517241379310343e-05, "loss": 0.1213, "step": 12656 }, { "epoch": 43.644827586206894, "grad_norm": 0.5732473731040955, "learning_rate": 2.7516781609195405e-05, "loss": 0.11, "step": 12657 }, { "epoch": 43.648275862068964, "grad_norm": 0.6179454326629639, "learning_rate": 2.751632183908046e-05, "loss": 0.0967, "step": 12658 }, { "epoch": 43.65172413793103, "grad_norm": 0.5903409719467163, "learning_rate": 2.751586206896552e-05, "loss": 0.1132, "step": 12659 }, { "epoch": 43.6551724137931, "grad_norm": 0.6995646357536316, "learning_rate": 2.7515402298850574e-05, "loss": 0.1095, "step": 12660 }, { "epoch": 43.65862068965517, "grad_norm": 0.6244361996650696, "learning_rate": 2.7514942528735633e-05, "loss": 0.1054, "step": 12661 }, { "epoch": 43.66206896551724, "grad_norm": 0.7344852685928345, "learning_rate": 2.751448275862069e-05, "loss": 0.1067, "step": 12662 }, { "epoch": 43.66551724137931, "grad_norm": 0.6341478824615479, "learning_rate": 2.7514022988505747e-05, "loss": 0.0843, "step": 12663 }, { "epoch": 43.66896551724138, "grad_norm": 1.7624411582946777, "learning_rate": 2.7513563218390802e-05, "loss": 0.0983, "step": 12664 }, { "epoch": 43.672413793103445, "grad_norm": 1.4765434265136719, "learning_rate": 2.7513103448275864e-05, "loss": 0.1061, "step": 12665 }, { "epoch": 43.675862068965515, "grad_norm": 0.6664218902587891, "learning_rate": 2.751264367816092e-05, "loss": 0.0827, "step": 12666 }, { "epoch": 43.679310344827584, "grad_norm": 0.6197667121887207, "learning_rate": 2.7512183908045978e-05, "loss": 0.0784, "step": 12667 }, { "epoch": 43.682758620689654, "grad_norm": 0.7269516587257385, "learning_rate": 2.7511724137931033e-05, "loss": 0.0848, "step": 12668 }, { "epoch": 43.686206896551724, "grad_norm": 2.0603678226470947, "learning_rate": 2.7511264367816092e-05, "loss": 0.0837, "step": 12669 }, { "epoch": 43.689655172413794, "grad_norm": 1.1367883682250977, "learning_rate": 2.751080459770115e-05, "loss": 0.1268, "step": 12670 }, { "epoch": 43.69310344827586, "grad_norm": 0.7604249715805054, "learning_rate": 2.7510344827586206e-05, "loss": 0.1718, "step": 12671 }, { "epoch": 43.69655172413793, "grad_norm": 0.5087053775787354, "learning_rate": 2.7509885057471265e-05, "loss": 0.1422, "step": 12672 }, { "epoch": 43.7, "grad_norm": 0.7183519005775452, "learning_rate": 2.7509425287356323e-05, "loss": 0.1525, "step": 12673 }, { "epoch": 43.703448275862065, "grad_norm": 1.149794340133667, "learning_rate": 2.750896551724138e-05, "loss": 0.1341, "step": 12674 }, { "epoch": 43.706896551724135, "grad_norm": 0.5702446699142456, "learning_rate": 2.7508505747126437e-05, "loss": 0.1253, "step": 12675 }, { "epoch": 43.710344827586205, "grad_norm": 0.6721222400665283, "learning_rate": 2.7508045977011493e-05, "loss": 0.1323, "step": 12676 }, { "epoch": 43.713793103448275, "grad_norm": 0.6639436483383179, "learning_rate": 2.7507586206896555e-05, "loss": 0.147, "step": 12677 }, { "epoch": 43.717241379310344, "grad_norm": 0.8623786568641663, "learning_rate": 2.750712643678161e-05, "loss": 0.1256, "step": 12678 }, { "epoch": 43.720689655172414, "grad_norm": 0.6667209267616272, "learning_rate": 2.7506666666666665e-05, "loss": 0.1083, "step": 12679 }, { "epoch": 43.724137931034484, "grad_norm": 0.4725092053413391, "learning_rate": 2.7506206896551724e-05, "loss": 0.106, "step": 12680 }, { "epoch": 43.727586206896554, "grad_norm": 1.6171987056732178, "learning_rate": 2.7505747126436783e-05, "loss": 0.1275, "step": 12681 }, { "epoch": 43.73103448275862, "grad_norm": 0.5786203742027283, "learning_rate": 2.750528735632184e-05, "loss": 0.1307, "step": 12682 }, { "epoch": 43.734482758620686, "grad_norm": 0.5293341279029846, "learning_rate": 2.7504827586206897e-05, "loss": 0.1065, "step": 12683 }, { "epoch": 43.737931034482756, "grad_norm": 0.8547682166099548, "learning_rate": 2.7504367816091952e-05, "loss": 0.1122, "step": 12684 }, { "epoch": 43.741379310344826, "grad_norm": 0.9272525310516357, "learning_rate": 2.7503908045977014e-05, "loss": 0.115, "step": 12685 }, { "epoch": 43.744827586206895, "grad_norm": 0.7923774719238281, "learning_rate": 2.750344827586207e-05, "loss": 0.0981, "step": 12686 }, { "epoch": 43.748275862068965, "grad_norm": 0.8295458555221558, "learning_rate": 2.7502988505747128e-05, "loss": 0.1113, "step": 12687 }, { "epoch": 43.751724137931035, "grad_norm": 1.1554652452468872, "learning_rate": 2.7502528735632183e-05, "loss": 0.0991, "step": 12688 }, { "epoch": 43.755172413793105, "grad_norm": 0.6961029767990112, "learning_rate": 2.7502068965517242e-05, "loss": 0.0964, "step": 12689 }, { "epoch": 43.758620689655174, "grad_norm": 0.7169547080993652, "learning_rate": 2.75016091954023e-05, "loss": 0.0962, "step": 12690 }, { "epoch": 43.762068965517244, "grad_norm": 2.0995330810546875, "learning_rate": 2.7501149425287356e-05, "loss": 0.1058, "step": 12691 }, { "epoch": 43.765517241379314, "grad_norm": 0.9578233957290649, "learning_rate": 2.7500689655172415e-05, "loss": 0.076, "step": 12692 }, { "epoch": 43.76896551724138, "grad_norm": 1.001589298248291, "learning_rate": 2.7500229885057473e-05, "loss": 0.085, "step": 12693 }, { "epoch": 43.772413793103446, "grad_norm": 1.0374611616134644, "learning_rate": 2.749977011494253e-05, "loss": 0.0979, "step": 12694 }, { "epoch": 43.775862068965516, "grad_norm": 1.2142345905303955, "learning_rate": 2.7499310344827587e-05, "loss": 0.1082, "step": 12695 }, { "epoch": 43.779310344827586, "grad_norm": 0.7881163954734802, "learning_rate": 2.7498850574712643e-05, "loss": 0.2102, "step": 12696 }, { "epoch": 43.782758620689656, "grad_norm": 0.5654337406158447, "learning_rate": 2.74983908045977e-05, "loss": 0.1672, "step": 12697 }, { "epoch": 43.786206896551725, "grad_norm": 0.5734061002731323, "learning_rate": 2.749793103448276e-05, "loss": 0.1657, "step": 12698 }, { "epoch": 43.789655172413795, "grad_norm": 1.4445381164550781, "learning_rate": 2.7497471264367815e-05, "loss": 0.1573, "step": 12699 }, { "epoch": 43.793103448275865, "grad_norm": 0.705150842666626, "learning_rate": 2.7497011494252874e-05, "loss": 0.1286, "step": 12700 }, { "epoch": 43.796551724137935, "grad_norm": 0.6812707781791687, "learning_rate": 2.7496551724137933e-05, "loss": 0.1303, "step": 12701 }, { "epoch": 43.8, "grad_norm": 0.5677488446235657, "learning_rate": 2.7496091954022988e-05, "loss": 0.1586, "step": 12702 }, { "epoch": 43.80344827586207, "grad_norm": 1.0819098949432373, "learning_rate": 2.7495632183908047e-05, "loss": 0.1434, "step": 12703 }, { "epoch": 43.80689655172414, "grad_norm": 1.2543803453445435, "learning_rate": 2.7495172413793102e-05, "loss": 0.1322, "step": 12704 }, { "epoch": 43.810344827586206, "grad_norm": 0.656292736530304, "learning_rate": 2.7494712643678164e-05, "loss": 0.1212, "step": 12705 }, { "epoch": 43.813793103448276, "grad_norm": 0.8442496657371521, "learning_rate": 2.749425287356322e-05, "loss": 0.1121, "step": 12706 }, { "epoch": 43.817241379310346, "grad_norm": 1.370567798614502, "learning_rate": 2.7493793103448275e-05, "loss": 0.1323, "step": 12707 }, { "epoch": 43.820689655172416, "grad_norm": 0.5810983777046204, "learning_rate": 2.7493333333333333e-05, "loss": 0.1216, "step": 12708 }, { "epoch": 43.824137931034485, "grad_norm": 0.8026648163795471, "learning_rate": 2.7492873563218392e-05, "loss": 0.1208, "step": 12709 }, { "epoch": 43.827586206896555, "grad_norm": 0.6950090527534485, "learning_rate": 2.749241379310345e-05, "loss": 0.1064, "step": 12710 }, { "epoch": 43.83103448275862, "grad_norm": 0.5690739154815674, "learning_rate": 2.7491954022988506e-05, "loss": 0.1124, "step": 12711 }, { "epoch": 43.83448275862069, "grad_norm": 0.9042580127716064, "learning_rate": 2.749149425287356e-05, "loss": 0.1146, "step": 12712 }, { "epoch": 43.83793103448276, "grad_norm": 0.6096228957176208, "learning_rate": 2.7491034482758623e-05, "loss": 0.0803, "step": 12713 }, { "epoch": 43.84137931034483, "grad_norm": 0.9927943348884583, "learning_rate": 2.749057471264368e-05, "loss": 0.112, "step": 12714 }, { "epoch": 43.8448275862069, "grad_norm": 0.6041661500930786, "learning_rate": 2.7490114942528737e-05, "loss": 0.0807, "step": 12715 }, { "epoch": 43.84827586206897, "grad_norm": 0.7394433617591858, "learning_rate": 2.7489655172413793e-05, "loss": 0.1066, "step": 12716 }, { "epoch": 43.851724137931036, "grad_norm": 0.706717312335968, "learning_rate": 2.748919540229885e-05, "loss": 0.0814, "step": 12717 }, { "epoch": 43.855172413793106, "grad_norm": 0.8183538913726807, "learning_rate": 2.748873563218391e-05, "loss": 0.0926, "step": 12718 }, { "epoch": 43.858620689655176, "grad_norm": 3.2591516971588135, "learning_rate": 2.7488275862068965e-05, "loss": 0.0887, "step": 12719 }, { "epoch": 43.86206896551724, "grad_norm": 1.303515076637268, "learning_rate": 2.7487816091954024e-05, "loss": 0.1234, "step": 12720 }, { "epoch": 43.86551724137931, "grad_norm": 0.7212747931480408, "learning_rate": 2.7487356321839083e-05, "loss": 0.2295, "step": 12721 }, { "epoch": 43.86896551724138, "grad_norm": 0.5769283771514893, "learning_rate": 2.7486896551724138e-05, "loss": 0.1653, "step": 12722 }, { "epoch": 43.87241379310345, "grad_norm": 0.5504125952720642, "learning_rate": 2.7486436781609197e-05, "loss": 0.1477, "step": 12723 }, { "epoch": 43.87586206896552, "grad_norm": 0.871793806552887, "learning_rate": 2.7485977011494252e-05, "loss": 0.1411, "step": 12724 }, { "epoch": 43.87931034482759, "grad_norm": 0.7099097371101379, "learning_rate": 2.748551724137931e-05, "loss": 0.1566, "step": 12725 }, { "epoch": 43.88275862068966, "grad_norm": 0.5363399386405945, "learning_rate": 2.748505747126437e-05, "loss": 0.1219, "step": 12726 }, { "epoch": 43.88620689655173, "grad_norm": 0.7953901290893555, "learning_rate": 2.7484597701149425e-05, "loss": 0.1484, "step": 12727 }, { "epoch": 43.889655172413796, "grad_norm": 0.7760125398635864, "learning_rate": 2.7484137931034483e-05, "loss": 0.1391, "step": 12728 }, { "epoch": 43.89310344827586, "grad_norm": 0.6235454082489014, "learning_rate": 2.7483678160919542e-05, "loss": 0.118, "step": 12729 }, { "epoch": 43.89655172413793, "grad_norm": 0.5070509314537048, "learning_rate": 2.7483218390804597e-05, "loss": 0.1182, "step": 12730 }, { "epoch": 43.9, "grad_norm": 0.5679940581321716, "learning_rate": 2.7482758620689656e-05, "loss": 0.1163, "step": 12731 }, { "epoch": 43.90344827586207, "grad_norm": 0.5891043543815613, "learning_rate": 2.748229885057471e-05, "loss": 0.1235, "step": 12732 }, { "epoch": 43.90689655172414, "grad_norm": 0.5352034568786621, "learning_rate": 2.7481839080459773e-05, "loss": 0.11, "step": 12733 }, { "epoch": 43.91034482758621, "grad_norm": 0.6636918783187866, "learning_rate": 2.748137931034483e-05, "loss": 0.1007, "step": 12734 }, { "epoch": 43.91379310344828, "grad_norm": 0.5928987860679626, "learning_rate": 2.7480919540229884e-05, "loss": 0.1205, "step": 12735 }, { "epoch": 43.91724137931035, "grad_norm": 0.6321230530738831, "learning_rate": 2.7480459770114942e-05, "loss": 0.1054, "step": 12736 }, { "epoch": 43.92068965517242, "grad_norm": 0.9203783869743347, "learning_rate": 2.748e-05, "loss": 0.0922, "step": 12737 }, { "epoch": 43.92413793103448, "grad_norm": 0.7751172184944153, "learning_rate": 2.747954022988506e-05, "loss": 0.0997, "step": 12738 }, { "epoch": 43.92758620689655, "grad_norm": 1.2730127573013306, "learning_rate": 2.7479080459770115e-05, "loss": 0.0883, "step": 12739 }, { "epoch": 43.93103448275862, "grad_norm": 0.5977160334587097, "learning_rate": 2.747862068965517e-05, "loss": 0.0847, "step": 12740 }, { "epoch": 43.93448275862069, "grad_norm": 0.8022189140319824, "learning_rate": 2.7478160919540233e-05, "loss": 0.1026, "step": 12741 }, { "epoch": 43.93793103448276, "grad_norm": 1.575539469718933, "learning_rate": 2.7477701149425288e-05, "loss": 0.0781, "step": 12742 }, { "epoch": 43.94137931034483, "grad_norm": 0.8872755765914917, "learning_rate": 2.7477241379310346e-05, "loss": 0.0827, "step": 12743 }, { "epoch": 43.9448275862069, "grad_norm": 0.7763521671295166, "learning_rate": 2.7476781609195402e-05, "loss": 0.0853, "step": 12744 }, { "epoch": 43.94827586206897, "grad_norm": 1.3577467203140259, "learning_rate": 2.747632183908046e-05, "loss": 0.1221, "step": 12745 }, { "epoch": 43.95172413793104, "grad_norm": 0.5807327628135681, "learning_rate": 2.747586206896552e-05, "loss": 0.2095, "step": 12746 }, { "epoch": 43.9551724137931, "grad_norm": 0.5303313136100769, "learning_rate": 2.7475402298850574e-05, "loss": 0.1563, "step": 12747 }, { "epoch": 43.95862068965517, "grad_norm": 0.6846400499343872, "learning_rate": 2.7474942528735633e-05, "loss": 0.1446, "step": 12748 }, { "epoch": 43.96206896551724, "grad_norm": 0.5940874218940735, "learning_rate": 2.7474482758620692e-05, "loss": 0.1374, "step": 12749 }, { "epoch": 43.96551724137931, "grad_norm": 1.1918003559112549, "learning_rate": 2.7474022988505747e-05, "loss": 0.1239, "step": 12750 }, { "epoch": 43.96896551724138, "grad_norm": 0.7404505610466003, "learning_rate": 2.7473563218390806e-05, "loss": 0.1287, "step": 12751 }, { "epoch": 43.97241379310345, "grad_norm": 1.0278677940368652, "learning_rate": 2.747310344827586e-05, "loss": 0.1296, "step": 12752 }, { "epoch": 43.97586206896552, "grad_norm": 1.0402873754501343, "learning_rate": 2.747264367816092e-05, "loss": 0.1171, "step": 12753 }, { "epoch": 43.97931034482759, "grad_norm": 0.6009992361068726, "learning_rate": 2.747218390804598e-05, "loss": 0.1123, "step": 12754 }, { "epoch": 43.98275862068966, "grad_norm": 0.6563612222671509, "learning_rate": 2.7471724137931034e-05, "loss": 0.0789, "step": 12755 }, { "epoch": 43.98620689655172, "grad_norm": 0.8515424728393555, "learning_rate": 2.7471264367816092e-05, "loss": 0.1016, "step": 12756 }, { "epoch": 43.98965517241379, "grad_norm": 0.8516396880149841, "learning_rate": 2.747080459770115e-05, "loss": 0.0906, "step": 12757 }, { "epoch": 43.99310344827586, "grad_norm": 1.2431817054748535, "learning_rate": 2.7470344827586206e-05, "loss": 0.1076, "step": 12758 }, { "epoch": 43.99655172413793, "grad_norm": 0.7657280564308167, "learning_rate": 2.7469885057471265e-05, "loss": 0.0663, "step": 12759 }, { "epoch": 44.0, "grad_norm": 0.9824294447898865, "learning_rate": 2.746942528735632e-05, "loss": 0.1447, "step": 12760 }, { "epoch": 44.00344827586207, "grad_norm": 0.8438288569450378, "learning_rate": 2.7468965517241382e-05, "loss": 0.1708, "step": 12761 }, { "epoch": 44.00689655172414, "grad_norm": 0.5374788045883179, "learning_rate": 2.7468505747126438e-05, "loss": 0.1492, "step": 12762 }, { "epoch": 44.01034482758621, "grad_norm": 0.7314534783363342, "learning_rate": 2.7468045977011493e-05, "loss": 0.1535, "step": 12763 }, { "epoch": 44.01379310344828, "grad_norm": 0.519109308719635, "learning_rate": 2.7467586206896552e-05, "loss": 0.1283, "step": 12764 }, { "epoch": 44.01724137931034, "grad_norm": 0.6606349945068359, "learning_rate": 2.746712643678161e-05, "loss": 0.1281, "step": 12765 }, { "epoch": 44.02068965517241, "grad_norm": 0.5877281427383423, "learning_rate": 2.746666666666667e-05, "loss": 0.1369, "step": 12766 }, { "epoch": 44.02413793103448, "grad_norm": 0.7185536026954651, "learning_rate": 2.7466206896551724e-05, "loss": 0.1427, "step": 12767 }, { "epoch": 44.02758620689655, "grad_norm": 1.0523806810379028, "learning_rate": 2.746574712643678e-05, "loss": 0.113, "step": 12768 }, { "epoch": 44.03103448275862, "grad_norm": 0.6372756361961365, "learning_rate": 2.7465287356321842e-05, "loss": 0.1132, "step": 12769 }, { "epoch": 44.03448275862069, "grad_norm": 0.6185532212257385, "learning_rate": 2.7464827586206897e-05, "loss": 0.1087, "step": 12770 }, { "epoch": 44.03793103448276, "grad_norm": 1.9470874071121216, "learning_rate": 2.7464367816091956e-05, "loss": 0.1052, "step": 12771 }, { "epoch": 44.04137931034483, "grad_norm": 0.6730583906173706, "learning_rate": 2.746390804597701e-05, "loss": 0.1176, "step": 12772 }, { "epoch": 44.0448275862069, "grad_norm": 0.6873778104782104, "learning_rate": 2.746344827586207e-05, "loss": 0.0787, "step": 12773 }, { "epoch": 44.04827586206896, "grad_norm": 0.5361456274986267, "learning_rate": 2.746298850574713e-05, "loss": 0.0958, "step": 12774 }, { "epoch": 44.05172413793103, "grad_norm": 0.583693265914917, "learning_rate": 2.7462528735632184e-05, "loss": 0.0943, "step": 12775 }, { "epoch": 44.0551724137931, "grad_norm": 0.6122737526893616, "learning_rate": 2.7462068965517242e-05, "loss": 0.1063, "step": 12776 }, { "epoch": 44.05862068965517, "grad_norm": 0.6358963251113892, "learning_rate": 2.74616091954023e-05, "loss": 0.0865, "step": 12777 }, { "epoch": 44.06206896551724, "grad_norm": 5.367896556854248, "learning_rate": 2.7461149425287356e-05, "loss": 0.079, "step": 12778 }, { "epoch": 44.06551724137931, "grad_norm": 0.9069180488586426, "learning_rate": 2.7460689655172415e-05, "loss": 0.0852, "step": 12779 }, { "epoch": 44.06896551724138, "grad_norm": 0.6416721940040588, "learning_rate": 2.746022988505747e-05, "loss": 0.0752, "step": 12780 }, { "epoch": 44.07241379310345, "grad_norm": 0.7743673324584961, "learning_rate": 2.745977011494253e-05, "loss": 0.0717, "step": 12781 }, { "epoch": 44.07586206896552, "grad_norm": 0.9502303004264832, "learning_rate": 2.7459310344827588e-05, "loss": 0.0814, "step": 12782 }, { "epoch": 44.07931034482758, "grad_norm": 0.6872621178627014, "learning_rate": 2.7458850574712643e-05, "loss": 0.0765, "step": 12783 }, { "epoch": 44.08275862068965, "grad_norm": 0.9197741746902466, "learning_rate": 2.74583908045977e-05, "loss": 0.0788, "step": 12784 }, { "epoch": 44.08620689655172, "grad_norm": 1.291980504989624, "learning_rate": 2.745793103448276e-05, "loss": 0.1196, "step": 12785 }, { "epoch": 44.08965517241379, "grad_norm": 1.0311707258224487, "learning_rate": 2.7457471264367816e-05, "loss": 0.1985, "step": 12786 }, { "epoch": 44.09310344827586, "grad_norm": 0.6664935946464539, "learning_rate": 2.7457011494252874e-05, "loss": 0.1492, "step": 12787 }, { "epoch": 44.09655172413793, "grad_norm": 0.6144649386405945, "learning_rate": 2.745655172413793e-05, "loss": 0.1442, "step": 12788 }, { "epoch": 44.1, "grad_norm": 0.7959133982658386, "learning_rate": 2.7456091954022992e-05, "loss": 0.1564, "step": 12789 }, { "epoch": 44.10344827586207, "grad_norm": 0.942143440246582, "learning_rate": 2.7455632183908047e-05, "loss": 0.152, "step": 12790 }, { "epoch": 44.10689655172414, "grad_norm": 0.5548621416091919, "learning_rate": 2.7455172413793102e-05, "loss": 0.1278, "step": 12791 }, { "epoch": 44.110344827586204, "grad_norm": 0.7763934135437012, "learning_rate": 2.745471264367816e-05, "loss": 0.1127, "step": 12792 }, { "epoch": 44.11379310344827, "grad_norm": 0.788783848285675, "learning_rate": 2.745425287356322e-05, "loss": 0.1302, "step": 12793 }, { "epoch": 44.11724137931034, "grad_norm": 0.6636466979980469, "learning_rate": 2.745379310344828e-05, "loss": 0.11, "step": 12794 }, { "epoch": 44.12068965517241, "grad_norm": 0.5682352185249329, "learning_rate": 2.7453333333333334e-05, "loss": 0.105, "step": 12795 }, { "epoch": 44.12413793103448, "grad_norm": 0.8664748072624207, "learning_rate": 2.745287356321839e-05, "loss": 0.112, "step": 12796 }, { "epoch": 44.12758620689655, "grad_norm": 0.6103196144104004, "learning_rate": 2.745241379310345e-05, "loss": 0.1071, "step": 12797 }, { "epoch": 44.13103448275862, "grad_norm": 0.7431607842445374, "learning_rate": 2.7451954022988506e-05, "loss": 0.0987, "step": 12798 }, { "epoch": 44.13448275862069, "grad_norm": 1.143399715423584, "learning_rate": 2.7451494252873565e-05, "loss": 0.1093, "step": 12799 }, { "epoch": 44.13793103448276, "grad_norm": 1.0434521436691284, "learning_rate": 2.745103448275862e-05, "loss": 0.1039, "step": 12800 }, { "epoch": 44.141379310344824, "grad_norm": 0.5553776025772095, "learning_rate": 2.745057471264368e-05, "loss": 0.0853, "step": 12801 }, { "epoch": 44.144827586206894, "grad_norm": 0.7361482381820679, "learning_rate": 2.7450114942528738e-05, "loss": 0.0875, "step": 12802 }, { "epoch": 44.148275862068964, "grad_norm": 0.6892111301422119, "learning_rate": 2.7449655172413793e-05, "loss": 0.0825, "step": 12803 }, { "epoch": 44.15172413793103, "grad_norm": 0.9128405451774597, "learning_rate": 2.744919540229885e-05, "loss": 0.0708, "step": 12804 }, { "epoch": 44.1551724137931, "grad_norm": 0.7980524897575378, "learning_rate": 2.744873563218391e-05, "loss": 0.0788, "step": 12805 }, { "epoch": 44.15862068965517, "grad_norm": 0.5971577167510986, "learning_rate": 2.7448275862068966e-05, "loss": 0.0621, "step": 12806 }, { "epoch": 44.16206896551724, "grad_norm": 0.8546199202537537, "learning_rate": 2.7447816091954024e-05, "loss": 0.0616, "step": 12807 }, { "epoch": 44.16551724137931, "grad_norm": 0.9333012700080872, "learning_rate": 2.744735632183908e-05, "loss": 0.0704, "step": 12808 }, { "epoch": 44.16896551724138, "grad_norm": 1.0265426635742188, "learning_rate": 2.7446896551724138e-05, "loss": 0.0754, "step": 12809 }, { "epoch": 44.172413793103445, "grad_norm": 1.212957739830017, "learning_rate": 2.7446436781609197e-05, "loss": 0.1267, "step": 12810 }, { "epoch": 44.175862068965515, "grad_norm": 1.6879245042800903, "learning_rate": 2.7445977011494252e-05, "loss": 0.188, "step": 12811 }, { "epoch": 44.179310344827584, "grad_norm": 0.6362742185592651, "learning_rate": 2.744551724137931e-05, "loss": 0.1517, "step": 12812 }, { "epoch": 44.182758620689654, "grad_norm": 0.46546030044555664, "learning_rate": 2.744505747126437e-05, "loss": 0.1468, "step": 12813 }, { "epoch": 44.186206896551724, "grad_norm": 0.7396271824836731, "learning_rate": 2.7444597701149425e-05, "loss": 0.1299, "step": 12814 }, { "epoch": 44.189655172413794, "grad_norm": 0.5416039228439331, "learning_rate": 2.7444137931034484e-05, "loss": 0.1272, "step": 12815 }, { "epoch": 44.19310344827586, "grad_norm": 0.7964096069335938, "learning_rate": 2.744367816091954e-05, "loss": 0.1211, "step": 12816 }, { "epoch": 44.19655172413793, "grad_norm": 0.6693609356880188, "learning_rate": 2.74432183908046e-05, "loss": 0.117, "step": 12817 }, { "epoch": 44.2, "grad_norm": 0.927332878112793, "learning_rate": 2.7442758620689656e-05, "loss": 0.1102, "step": 12818 }, { "epoch": 44.203448275862065, "grad_norm": 0.6656819581985474, "learning_rate": 2.744229885057471e-05, "loss": 0.1287, "step": 12819 }, { "epoch": 44.206896551724135, "grad_norm": 0.6493300795555115, "learning_rate": 2.744183908045977e-05, "loss": 0.1077, "step": 12820 }, { "epoch": 44.210344827586205, "grad_norm": 0.7022927403450012, "learning_rate": 2.744137931034483e-05, "loss": 0.1102, "step": 12821 }, { "epoch": 44.213793103448275, "grad_norm": 0.8013306260108948, "learning_rate": 2.7440919540229888e-05, "loss": 0.1241, "step": 12822 }, { "epoch": 44.217241379310344, "grad_norm": 0.6720398664474487, "learning_rate": 2.7440459770114943e-05, "loss": 0.1019, "step": 12823 }, { "epoch": 44.220689655172414, "grad_norm": 1.5783190727233887, "learning_rate": 2.7439999999999998e-05, "loss": 0.107, "step": 12824 }, { "epoch": 44.224137931034484, "grad_norm": 0.5707923173904419, "learning_rate": 2.743954022988506e-05, "loss": 0.1087, "step": 12825 }, { "epoch": 44.227586206896554, "grad_norm": 0.5214501619338989, "learning_rate": 2.7439080459770116e-05, "loss": 0.0831, "step": 12826 }, { "epoch": 44.23103448275862, "grad_norm": 0.6704422235488892, "learning_rate": 2.7438620689655174e-05, "loss": 0.0915, "step": 12827 }, { "epoch": 44.234482758620686, "grad_norm": 0.8678042888641357, "learning_rate": 2.743816091954023e-05, "loss": 0.0832, "step": 12828 }, { "epoch": 44.237931034482756, "grad_norm": 0.7269654870033264, "learning_rate": 2.7437701149425288e-05, "loss": 0.0933, "step": 12829 }, { "epoch": 44.241379310344826, "grad_norm": 1.1443662643432617, "learning_rate": 2.7437241379310347e-05, "loss": 0.0909, "step": 12830 }, { "epoch": 44.244827586206895, "grad_norm": 0.6528841257095337, "learning_rate": 2.7436781609195402e-05, "loss": 0.0712, "step": 12831 }, { "epoch": 44.248275862068965, "grad_norm": 0.9862557649612427, "learning_rate": 2.743632183908046e-05, "loss": 0.0701, "step": 12832 }, { "epoch": 44.251724137931035, "grad_norm": 0.9130429029464722, "learning_rate": 2.743586206896552e-05, "loss": 0.0852, "step": 12833 }, { "epoch": 44.255172413793105, "grad_norm": 1.3875679969787598, "learning_rate": 2.7435402298850575e-05, "loss": 0.0744, "step": 12834 }, { "epoch": 44.258620689655174, "grad_norm": 1.4562031030654907, "learning_rate": 2.7434942528735634e-05, "loss": 0.0998, "step": 12835 }, { "epoch": 44.262068965517244, "grad_norm": 0.8291147947311401, "learning_rate": 2.743448275862069e-05, "loss": 0.1859, "step": 12836 }, { "epoch": 44.265517241379314, "grad_norm": 0.5437877774238586, "learning_rate": 2.743402298850575e-05, "loss": 0.1596, "step": 12837 }, { "epoch": 44.26896551724138, "grad_norm": 0.4591284394264221, "learning_rate": 2.7433563218390806e-05, "loss": 0.1338, "step": 12838 }, { "epoch": 44.272413793103446, "grad_norm": 0.6300811767578125, "learning_rate": 2.743310344827586e-05, "loss": 0.1423, "step": 12839 }, { "epoch": 44.275862068965516, "grad_norm": 1.2403916120529175, "learning_rate": 2.743264367816092e-05, "loss": 0.1522, "step": 12840 }, { "epoch": 44.279310344827586, "grad_norm": 0.6560309529304504, "learning_rate": 2.743218390804598e-05, "loss": 0.1166, "step": 12841 }, { "epoch": 44.282758620689656, "grad_norm": 0.5544917583465576, "learning_rate": 2.7431724137931034e-05, "loss": 0.1218, "step": 12842 }, { "epoch": 44.286206896551725, "grad_norm": 0.4693603813648224, "learning_rate": 2.7431264367816093e-05, "loss": 0.1307, "step": 12843 }, { "epoch": 44.289655172413795, "grad_norm": 0.5700213313102722, "learning_rate": 2.7430804597701148e-05, "loss": 0.112, "step": 12844 }, { "epoch": 44.293103448275865, "grad_norm": 0.6201896071434021, "learning_rate": 2.743034482758621e-05, "loss": 0.0866, "step": 12845 }, { "epoch": 44.296551724137935, "grad_norm": 0.7756028175354004, "learning_rate": 2.7429885057471265e-05, "loss": 0.1023, "step": 12846 }, { "epoch": 44.3, "grad_norm": 1.3132966756820679, "learning_rate": 2.742942528735632e-05, "loss": 0.1039, "step": 12847 }, { "epoch": 44.30344827586207, "grad_norm": 0.808770477771759, "learning_rate": 2.742896551724138e-05, "loss": 0.1243, "step": 12848 }, { "epoch": 44.30689655172414, "grad_norm": 0.6144152283668518, "learning_rate": 2.7428505747126438e-05, "loss": 0.0974, "step": 12849 }, { "epoch": 44.310344827586206, "grad_norm": 0.5993027091026306, "learning_rate": 2.7428045977011497e-05, "loss": 0.0951, "step": 12850 }, { "epoch": 44.313793103448276, "grad_norm": 0.7463039755821228, "learning_rate": 2.7427586206896552e-05, "loss": 0.0824, "step": 12851 }, { "epoch": 44.317241379310346, "grad_norm": 0.7785068154335022, "learning_rate": 2.7427126436781607e-05, "loss": 0.0892, "step": 12852 }, { "epoch": 44.320689655172416, "grad_norm": 0.7882689237594604, "learning_rate": 2.742666666666667e-05, "loss": 0.1109, "step": 12853 }, { "epoch": 44.324137931034485, "grad_norm": 0.5226072072982788, "learning_rate": 2.7426206896551725e-05, "loss": 0.0866, "step": 12854 }, { "epoch": 44.327586206896555, "grad_norm": 1.6891893148422241, "learning_rate": 2.7425747126436783e-05, "loss": 0.0871, "step": 12855 }, { "epoch": 44.33103448275862, "grad_norm": 0.7126477360725403, "learning_rate": 2.742528735632184e-05, "loss": 0.0874, "step": 12856 }, { "epoch": 44.33448275862069, "grad_norm": 0.6554867625236511, "learning_rate": 2.7424827586206897e-05, "loss": 0.0737, "step": 12857 }, { "epoch": 44.33793103448276, "grad_norm": 0.6459823250770569, "learning_rate": 2.7424367816091956e-05, "loss": 0.0751, "step": 12858 }, { "epoch": 44.34137931034483, "grad_norm": 1.1606241464614868, "learning_rate": 2.742390804597701e-05, "loss": 0.0792, "step": 12859 }, { "epoch": 44.3448275862069, "grad_norm": 1.868289589881897, "learning_rate": 2.742344827586207e-05, "loss": 0.1059, "step": 12860 }, { "epoch": 44.34827586206897, "grad_norm": 1.177254557609558, "learning_rate": 2.742298850574713e-05, "loss": 0.1818, "step": 12861 }, { "epoch": 44.351724137931036, "grad_norm": 0.7018643617630005, "learning_rate": 2.7422528735632184e-05, "loss": 0.1326, "step": 12862 }, { "epoch": 44.355172413793106, "grad_norm": 0.5060473680496216, "learning_rate": 2.7422068965517243e-05, "loss": 0.1541, "step": 12863 }, { "epoch": 44.358620689655176, "grad_norm": 0.5180804133415222, "learning_rate": 2.7421609195402298e-05, "loss": 0.1398, "step": 12864 }, { "epoch": 44.36206896551724, "grad_norm": 0.5934118628501892, "learning_rate": 2.742114942528736e-05, "loss": 0.1302, "step": 12865 }, { "epoch": 44.36551724137931, "grad_norm": 0.4815073311328888, "learning_rate": 2.7420689655172415e-05, "loss": 0.1269, "step": 12866 }, { "epoch": 44.36896551724138, "grad_norm": 0.7010154724121094, "learning_rate": 2.742022988505747e-05, "loss": 0.1115, "step": 12867 }, { "epoch": 44.37241379310345, "grad_norm": 0.7796452045440674, "learning_rate": 2.741977011494253e-05, "loss": 0.137, "step": 12868 }, { "epoch": 44.37586206896552, "grad_norm": 1.0009815692901611, "learning_rate": 2.7419310344827588e-05, "loss": 0.1112, "step": 12869 }, { "epoch": 44.37931034482759, "grad_norm": 0.5520039796829224, "learning_rate": 2.7418850574712643e-05, "loss": 0.1054, "step": 12870 }, { "epoch": 44.38275862068966, "grad_norm": 0.5900134444236755, "learning_rate": 2.7418390804597702e-05, "loss": 0.1064, "step": 12871 }, { "epoch": 44.38620689655173, "grad_norm": 0.6187995076179504, "learning_rate": 2.7417931034482757e-05, "loss": 0.1122, "step": 12872 }, { "epoch": 44.389655172413796, "grad_norm": 0.7340213060379028, "learning_rate": 2.741747126436782e-05, "loss": 0.1054, "step": 12873 }, { "epoch": 44.39310344827586, "grad_norm": 0.7422870993614197, "learning_rate": 2.7417011494252875e-05, "loss": 0.0903, "step": 12874 }, { "epoch": 44.39655172413793, "grad_norm": 0.6198467016220093, "learning_rate": 2.741655172413793e-05, "loss": 0.0945, "step": 12875 }, { "epoch": 44.4, "grad_norm": 0.6878183484077454, "learning_rate": 2.741609195402299e-05, "loss": 0.0939, "step": 12876 }, { "epoch": 44.40344827586207, "grad_norm": 0.7305931448936462, "learning_rate": 2.7415632183908047e-05, "loss": 0.1053, "step": 12877 }, { "epoch": 44.40689655172414, "grad_norm": 0.7383065819740295, "learning_rate": 2.7415172413793106e-05, "loss": 0.0745, "step": 12878 }, { "epoch": 44.41034482758621, "grad_norm": 0.6296085715293884, "learning_rate": 2.741471264367816e-05, "loss": 0.0858, "step": 12879 }, { "epoch": 44.41379310344828, "grad_norm": 1.0152335166931152, "learning_rate": 2.7414252873563217e-05, "loss": 0.0715, "step": 12880 }, { "epoch": 44.41724137931035, "grad_norm": 0.7705395221710205, "learning_rate": 2.741379310344828e-05, "loss": 0.1006, "step": 12881 }, { "epoch": 44.42068965517242, "grad_norm": 0.5810820460319519, "learning_rate": 2.7413333333333334e-05, "loss": 0.0764, "step": 12882 }, { "epoch": 44.42413793103448, "grad_norm": 0.6956206560134888, "learning_rate": 2.7412873563218393e-05, "loss": 0.0729, "step": 12883 }, { "epoch": 44.42758620689655, "grad_norm": 0.7959281206130981, "learning_rate": 2.7412413793103448e-05, "loss": 0.082, "step": 12884 }, { "epoch": 44.43103448275862, "grad_norm": 1.7917271852493286, "learning_rate": 2.7411954022988507e-05, "loss": 0.0968, "step": 12885 }, { "epoch": 44.43448275862069, "grad_norm": 0.6762353181838989, "learning_rate": 2.7411494252873565e-05, "loss": 0.1973, "step": 12886 }, { "epoch": 44.43793103448276, "grad_norm": 0.6441359519958496, "learning_rate": 2.741103448275862e-05, "loss": 0.1565, "step": 12887 }, { "epoch": 44.44137931034483, "grad_norm": 0.6441949605941772, "learning_rate": 2.741057471264368e-05, "loss": 0.1448, "step": 12888 }, { "epoch": 44.4448275862069, "grad_norm": 0.6458320617675781, "learning_rate": 2.7410114942528738e-05, "loss": 0.1403, "step": 12889 }, { "epoch": 44.44827586206897, "grad_norm": 0.46595460176467896, "learning_rate": 2.7409655172413793e-05, "loss": 0.1307, "step": 12890 }, { "epoch": 44.45172413793104, "grad_norm": 0.6309559345245361, "learning_rate": 2.7409195402298852e-05, "loss": 0.1428, "step": 12891 }, { "epoch": 44.4551724137931, "grad_norm": 0.5914767980575562, "learning_rate": 2.7408735632183907e-05, "loss": 0.1201, "step": 12892 }, { "epoch": 44.45862068965517, "grad_norm": 0.4835461676120758, "learning_rate": 2.740827586206897e-05, "loss": 0.0988, "step": 12893 }, { "epoch": 44.46206896551724, "grad_norm": 0.5617063045501709, "learning_rate": 2.7407816091954025e-05, "loss": 0.105, "step": 12894 }, { "epoch": 44.46551724137931, "grad_norm": 0.6016494631767273, "learning_rate": 2.740735632183908e-05, "loss": 0.104, "step": 12895 }, { "epoch": 44.46896551724138, "grad_norm": 0.5732749700546265, "learning_rate": 2.740689655172414e-05, "loss": 0.1124, "step": 12896 }, { "epoch": 44.47241379310345, "grad_norm": 0.5443530678749084, "learning_rate": 2.7406436781609197e-05, "loss": 0.1076, "step": 12897 }, { "epoch": 44.47586206896552, "grad_norm": 0.6539928317070007, "learning_rate": 2.7405977011494253e-05, "loss": 0.1001, "step": 12898 }, { "epoch": 44.47931034482759, "grad_norm": 0.6227372288703918, "learning_rate": 2.740551724137931e-05, "loss": 0.098, "step": 12899 }, { "epoch": 44.48275862068966, "grad_norm": 0.5072982907295227, "learning_rate": 2.7405057471264367e-05, "loss": 0.0944, "step": 12900 }, { "epoch": 44.48620689655172, "grad_norm": 0.7172431349754333, "learning_rate": 2.740459770114943e-05, "loss": 0.0943, "step": 12901 }, { "epoch": 44.48965517241379, "grad_norm": 0.5216692090034485, "learning_rate": 2.7404137931034484e-05, "loss": 0.085, "step": 12902 }, { "epoch": 44.49310344827586, "grad_norm": 0.6288663744926453, "learning_rate": 2.740367816091954e-05, "loss": 0.0932, "step": 12903 }, { "epoch": 44.49655172413793, "grad_norm": 0.7304041385650635, "learning_rate": 2.7403218390804598e-05, "loss": 0.0915, "step": 12904 }, { "epoch": 44.5, "grad_norm": 0.5975350141525269, "learning_rate": 2.7402758620689657e-05, "loss": 0.0785, "step": 12905 }, { "epoch": 44.50344827586207, "grad_norm": 0.7968757748603821, "learning_rate": 2.7402298850574715e-05, "loss": 0.0871, "step": 12906 }, { "epoch": 44.50689655172414, "grad_norm": 1.274003505706787, "learning_rate": 2.740183908045977e-05, "loss": 0.0816, "step": 12907 }, { "epoch": 44.51034482758621, "grad_norm": 1.1151645183563232, "learning_rate": 2.7401379310344826e-05, "loss": 0.0698, "step": 12908 }, { "epoch": 44.51379310344828, "grad_norm": 0.7485782504081726, "learning_rate": 2.7400919540229888e-05, "loss": 0.0685, "step": 12909 }, { "epoch": 44.51724137931034, "grad_norm": 1.3525348901748657, "learning_rate": 2.7400459770114943e-05, "loss": 0.1015, "step": 12910 }, { "epoch": 44.52068965517241, "grad_norm": 0.5467289090156555, "learning_rate": 2.7400000000000002e-05, "loss": 0.1893, "step": 12911 }, { "epoch": 44.52413793103448, "grad_norm": 1.2052674293518066, "learning_rate": 2.7399540229885057e-05, "loss": 0.1616, "step": 12912 }, { "epoch": 44.52758620689655, "grad_norm": 0.5606614947319031, "learning_rate": 2.7399080459770116e-05, "loss": 0.1743, "step": 12913 }, { "epoch": 44.53103448275862, "grad_norm": 0.5303100943565369, "learning_rate": 2.7398620689655175e-05, "loss": 0.14, "step": 12914 }, { "epoch": 44.53448275862069, "grad_norm": 0.4392830729484558, "learning_rate": 2.739816091954023e-05, "loss": 0.1358, "step": 12915 }, { "epoch": 44.53793103448276, "grad_norm": 0.9829938411712646, "learning_rate": 2.739770114942529e-05, "loss": 0.1292, "step": 12916 }, { "epoch": 44.54137931034483, "grad_norm": 0.5151654481887817, "learning_rate": 2.7397241379310347e-05, "loss": 0.1291, "step": 12917 }, { "epoch": 44.5448275862069, "grad_norm": 0.5288822650909424, "learning_rate": 2.7396781609195403e-05, "loss": 0.1096, "step": 12918 }, { "epoch": 44.54827586206896, "grad_norm": 0.48591047525405884, "learning_rate": 2.739632183908046e-05, "loss": 0.104, "step": 12919 }, { "epoch": 44.55172413793103, "grad_norm": 0.5747652053833008, "learning_rate": 2.7395862068965517e-05, "loss": 0.1047, "step": 12920 }, { "epoch": 44.5551724137931, "grad_norm": 0.8740023374557495, "learning_rate": 2.739540229885058e-05, "loss": 0.1083, "step": 12921 }, { "epoch": 44.55862068965517, "grad_norm": 0.5613550543785095, "learning_rate": 2.7394942528735634e-05, "loss": 0.0938, "step": 12922 }, { "epoch": 44.56206896551724, "grad_norm": 0.6485486626625061, "learning_rate": 2.739448275862069e-05, "loss": 0.0973, "step": 12923 }, { "epoch": 44.56551724137931, "grad_norm": 0.5630452632904053, "learning_rate": 2.7394022988505748e-05, "loss": 0.0942, "step": 12924 }, { "epoch": 44.56896551724138, "grad_norm": 0.6455623507499695, "learning_rate": 2.7393563218390807e-05, "loss": 0.1052, "step": 12925 }, { "epoch": 44.57241379310345, "grad_norm": 0.6223017573356628, "learning_rate": 2.7393103448275865e-05, "loss": 0.0985, "step": 12926 }, { "epoch": 44.57586206896552, "grad_norm": 0.9660621881484985, "learning_rate": 2.739264367816092e-05, "loss": 0.0783, "step": 12927 }, { "epoch": 44.57931034482758, "grad_norm": 0.5174828767776489, "learning_rate": 2.7392183908045976e-05, "loss": 0.0813, "step": 12928 }, { "epoch": 44.58275862068965, "grad_norm": 0.6216137409210205, "learning_rate": 2.7391724137931038e-05, "loss": 0.0751, "step": 12929 }, { "epoch": 44.58620689655172, "grad_norm": 2.4090776443481445, "learning_rate": 2.7391264367816093e-05, "loss": 0.0798, "step": 12930 }, { "epoch": 44.58965517241379, "grad_norm": 0.6740267872810364, "learning_rate": 2.739080459770115e-05, "loss": 0.0881, "step": 12931 }, { "epoch": 44.59310344827586, "grad_norm": 0.7763713598251343, "learning_rate": 2.7390344827586207e-05, "loss": 0.056, "step": 12932 }, { "epoch": 44.59655172413793, "grad_norm": 0.8033932447433472, "learning_rate": 2.7389885057471266e-05, "loss": 0.0799, "step": 12933 }, { "epoch": 44.6, "grad_norm": 0.8073887228965759, "learning_rate": 2.7389425287356325e-05, "loss": 0.0765, "step": 12934 }, { "epoch": 44.60344827586207, "grad_norm": 1.721384048461914, "learning_rate": 2.738896551724138e-05, "loss": 0.0986, "step": 12935 }, { "epoch": 44.60689655172414, "grad_norm": 0.6399405002593994, "learning_rate": 2.7388505747126435e-05, "loss": 0.1917, "step": 12936 }, { "epoch": 44.610344827586204, "grad_norm": 0.5761078000068665, "learning_rate": 2.7388045977011497e-05, "loss": 0.1519, "step": 12937 }, { "epoch": 44.61379310344827, "grad_norm": 0.6103179454803467, "learning_rate": 2.7387586206896552e-05, "loss": 0.1263, "step": 12938 }, { "epoch": 44.61724137931034, "grad_norm": 0.7387210130691528, "learning_rate": 2.738712643678161e-05, "loss": 0.1281, "step": 12939 }, { "epoch": 44.62068965517241, "grad_norm": 1.9058420658111572, "learning_rate": 2.7386666666666666e-05, "loss": 0.1277, "step": 12940 }, { "epoch": 44.62413793103448, "grad_norm": 0.5051743388175964, "learning_rate": 2.7386206896551725e-05, "loss": 0.1294, "step": 12941 }, { "epoch": 44.62758620689655, "grad_norm": 0.5540062785148621, "learning_rate": 2.7385747126436784e-05, "loss": 0.1301, "step": 12942 }, { "epoch": 44.63103448275862, "grad_norm": 0.49864962697029114, "learning_rate": 2.738528735632184e-05, "loss": 0.1258, "step": 12943 }, { "epoch": 44.63448275862069, "grad_norm": 5.112876892089844, "learning_rate": 2.7384827586206898e-05, "loss": 0.1214, "step": 12944 }, { "epoch": 44.63793103448276, "grad_norm": 0.7488901615142822, "learning_rate": 2.7384367816091953e-05, "loss": 0.1087, "step": 12945 }, { "epoch": 44.641379310344824, "grad_norm": 0.7884544134140015, "learning_rate": 2.7383908045977012e-05, "loss": 0.1083, "step": 12946 }, { "epoch": 44.644827586206894, "grad_norm": 0.6313149333000183, "learning_rate": 2.738344827586207e-05, "loss": 0.1181, "step": 12947 }, { "epoch": 44.648275862068964, "grad_norm": 0.5441499948501587, "learning_rate": 2.7382988505747126e-05, "loss": 0.0933, "step": 12948 }, { "epoch": 44.65172413793103, "grad_norm": 0.6412109136581421, "learning_rate": 2.7382528735632184e-05, "loss": 0.0982, "step": 12949 }, { "epoch": 44.6551724137931, "grad_norm": 0.8882109522819519, "learning_rate": 2.7382068965517243e-05, "loss": 0.0772, "step": 12950 }, { "epoch": 44.65862068965517, "grad_norm": 0.5988144278526306, "learning_rate": 2.73816091954023e-05, "loss": 0.0799, "step": 12951 }, { "epoch": 44.66206896551724, "grad_norm": 0.8469659686088562, "learning_rate": 2.7381149425287357e-05, "loss": 0.0933, "step": 12952 }, { "epoch": 44.66551724137931, "grad_norm": 0.6296398639678955, "learning_rate": 2.7380689655172412e-05, "loss": 0.079, "step": 12953 }, { "epoch": 44.66896551724138, "grad_norm": 0.850063145160675, "learning_rate": 2.7380229885057474e-05, "loss": 0.1018, "step": 12954 }, { "epoch": 44.672413793103445, "grad_norm": 0.6390289068222046, "learning_rate": 2.737977011494253e-05, "loss": 0.0784, "step": 12955 }, { "epoch": 44.675862068965515, "grad_norm": 0.9045057892799377, "learning_rate": 2.7379310344827585e-05, "loss": 0.0737, "step": 12956 }, { "epoch": 44.679310344827584, "grad_norm": 0.642521321773529, "learning_rate": 2.7378850574712644e-05, "loss": 0.0645, "step": 12957 }, { "epoch": 44.682758620689654, "grad_norm": 1.8845158815383911, "learning_rate": 2.7378390804597702e-05, "loss": 0.0809, "step": 12958 }, { "epoch": 44.686206896551724, "grad_norm": 1.1416820287704468, "learning_rate": 2.7377931034482758e-05, "loss": 0.0806, "step": 12959 }, { "epoch": 44.689655172413794, "grad_norm": 1.0998128652572632, "learning_rate": 2.7377471264367816e-05, "loss": 0.1152, "step": 12960 }, { "epoch": 44.69310344827586, "grad_norm": 0.583092987537384, "learning_rate": 2.7377011494252872e-05, "loss": 0.1957, "step": 12961 }, { "epoch": 44.69655172413793, "grad_norm": 0.6268788576126099, "learning_rate": 2.7376551724137934e-05, "loss": 0.1448, "step": 12962 }, { "epoch": 44.7, "grad_norm": 0.5808599591255188, "learning_rate": 2.737609195402299e-05, "loss": 0.1448, "step": 12963 }, { "epoch": 44.703448275862065, "grad_norm": 0.7268388271331787, "learning_rate": 2.7375632183908044e-05, "loss": 0.1355, "step": 12964 }, { "epoch": 44.706896551724135, "grad_norm": 1.622430682182312, "learning_rate": 2.7375172413793103e-05, "loss": 0.1347, "step": 12965 }, { "epoch": 44.710344827586205, "grad_norm": 1.085119605064392, "learning_rate": 2.7374712643678162e-05, "loss": 0.1176, "step": 12966 }, { "epoch": 44.713793103448275, "grad_norm": 0.9233124256134033, "learning_rate": 2.737425287356322e-05, "loss": 0.1524, "step": 12967 }, { "epoch": 44.717241379310344, "grad_norm": 0.8203046917915344, "learning_rate": 2.7373793103448276e-05, "loss": 0.1381, "step": 12968 }, { "epoch": 44.720689655172414, "grad_norm": 0.6262616515159607, "learning_rate": 2.737333333333333e-05, "loss": 0.1202, "step": 12969 }, { "epoch": 44.724137931034484, "grad_norm": 0.6147167086601257, "learning_rate": 2.7372873563218393e-05, "loss": 0.1225, "step": 12970 }, { "epoch": 44.727586206896554, "grad_norm": 0.9148752093315125, "learning_rate": 2.737241379310345e-05, "loss": 0.1099, "step": 12971 }, { "epoch": 44.73103448275862, "grad_norm": 0.7995124459266663, "learning_rate": 2.7371954022988507e-05, "loss": 0.1134, "step": 12972 }, { "epoch": 44.734482758620686, "grad_norm": 0.7170491218566895, "learning_rate": 2.7371494252873562e-05, "loss": 0.0953, "step": 12973 }, { "epoch": 44.737931034482756, "grad_norm": 1.2926443815231323, "learning_rate": 2.737103448275862e-05, "loss": 0.1004, "step": 12974 }, { "epoch": 44.741379310344826, "grad_norm": 0.5239159464836121, "learning_rate": 2.737057471264368e-05, "loss": 0.1038, "step": 12975 }, { "epoch": 44.744827586206895, "grad_norm": 0.9940451383590698, "learning_rate": 2.7370114942528735e-05, "loss": 0.0981, "step": 12976 }, { "epoch": 44.748275862068965, "grad_norm": 0.545352041721344, "learning_rate": 2.7369655172413794e-05, "loss": 0.0984, "step": 12977 }, { "epoch": 44.751724137931035, "grad_norm": 0.7383098006248474, "learning_rate": 2.7369195402298852e-05, "loss": 0.0918, "step": 12978 }, { "epoch": 44.755172413793105, "grad_norm": 0.9005595445632935, "learning_rate": 2.7368735632183908e-05, "loss": 0.1007, "step": 12979 }, { "epoch": 44.758620689655174, "grad_norm": 0.5987162590026855, "learning_rate": 2.7368275862068966e-05, "loss": 0.0759, "step": 12980 }, { "epoch": 44.762068965517244, "grad_norm": 0.7593058943748474, "learning_rate": 2.736781609195402e-05, "loss": 0.1026, "step": 12981 }, { "epoch": 44.765517241379314, "grad_norm": 0.7736737728118896, "learning_rate": 2.7367356321839084e-05, "loss": 0.0678, "step": 12982 }, { "epoch": 44.76896551724138, "grad_norm": 0.9037504196166992, "learning_rate": 2.736689655172414e-05, "loss": 0.0911, "step": 12983 }, { "epoch": 44.772413793103446, "grad_norm": 1.1208466291427612, "learning_rate": 2.7366436781609194e-05, "loss": 0.1007, "step": 12984 }, { "epoch": 44.775862068965516, "grad_norm": 1.1065820455551147, "learning_rate": 2.7365977011494253e-05, "loss": 0.1341, "step": 12985 }, { "epoch": 44.779310344827586, "grad_norm": 0.7351434826850891, "learning_rate": 2.736551724137931e-05, "loss": 0.2017, "step": 12986 }, { "epoch": 44.782758620689656, "grad_norm": 0.608289361000061, "learning_rate": 2.7365057471264367e-05, "loss": 0.1637, "step": 12987 }, { "epoch": 44.786206896551725, "grad_norm": 1.0820201635360718, "learning_rate": 2.7364597701149426e-05, "loss": 0.1438, "step": 12988 }, { "epoch": 44.789655172413795, "grad_norm": 0.6014871597290039, "learning_rate": 2.736413793103448e-05, "loss": 0.1227, "step": 12989 }, { "epoch": 44.793103448275865, "grad_norm": 0.5139683485031128, "learning_rate": 2.7363678160919543e-05, "loss": 0.1338, "step": 12990 }, { "epoch": 44.796551724137935, "grad_norm": 0.5541486740112305, "learning_rate": 2.73632183908046e-05, "loss": 0.1246, "step": 12991 }, { "epoch": 44.8, "grad_norm": 1.0169639587402344, "learning_rate": 2.7362758620689654e-05, "loss": 0.1222, "step": 12992 }, { "epoch": 44.80344827586207, "grad_norm": 0.6509732007980347, "learning_rate": 2.7362298850574712e-05, "loss": 0.1189, "step": 12993 }, { "epoch": 44.80689655172414, "grad_norm": 0.938089907169342, "learning_rate": 2.736183908045977e-05, "loss": 0.1032, "step": 12994 }, { "epoch": 44.810344827586206, "grad_norm": 0.6848647594451904, "learning_rate": 2.736137931034483e-05, "loss": 0.1108, "step": 12995 }, { "epoch": 44.813793103448276, "grad_norm": 0.7621636390686035, "learning_rate": 2.7360919540229885e-05, "loss": 0.1185, "step": 12996 }, { "epoch": 44.817241379310346, "grad_norm": 0.7163059115409851, "learning_rate": 2.736045977011494e-05, "loss": 0.1024, "step": 12997 }, { "epoch": 44.820689655172416, "grad_norm": 0.6938391327857971, "learning_rate": 2.7360000000000002e-05, "loss": 0.1036, "step": 12998 }, { "epoch": 44.824137931034485, "grad_norm": 0.6829192638397217, "learning_rate": 2.7359540229885058e-05, "loss": 0.1147, "step": 12999 }, { "epoch": 44.827586206896555, "grad_norm": 0.5827555060386658, "learning_rate": 2.7359080459770116e-05, "loss": 0.0998, "step": 13000 }, { "epoch": 44.827586206896555, "eval_cer": 0.13734890074486455, "eval_loss": 0.393341600894928, "eval_runtime": 18.3701, "eval_samples_per_second": 50.462, "eval_steps_per_second": 0.163, "eval_wer": 0.3093944099378882, "step": 13000 }, { "epoch": 44.827586206896555, "step": 13000, "total_flos": 4.904690873026415e+20, "train_loss": 0.438659078159871, "train_runtime": 43318.6062, "train_samples_per_second": 535.036, "train_steps_per_second": 1.674 } ], "logging_steps": 1.0, "max_steps": 72500, "num_input_tokens_seen": 0, "num_train_epochs": 250, "save_steps": 1000, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 5, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 5 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 4.904690873026415e+20, "train_batch_size": 160, "trial_name": null, "trial_params": null }