diff --git "a/trainer_state.json" "b/trainer_state.json" new file mode 100644--- /dev/null +++ "b/trainer_state.json" @@ -0,0 +1,12365 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 4.989351992698509, + "global_step": 2050, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 6.097560975609757e-07, + "loss": 1.2345, + "step": 1 + }, + { + "epoch": 0.0, + "learning_rate": 1.2195121951219514e-06, + "loss": 1.3023, + "step": 2 + }, + { + "epoch": 0.01, + "learning_rate": 1.8292682926829268e-06, + "loss": 1.2241, + "step": 3 + }, + { + "epoch": 0.01, + "learning_rate": 2.4390243902439027e-06, + "loss": 1.2505, + "step": 4 + }, + { + "epoch": 0.01, + "learning_rate": 3.0487804878048782e-06, + "loss": 1.1555, + "step": 5 + }, + { + "epoch": 0.01, + "learning_rate": 3.6585365853658537e-06, + "loss": 1.101, + "step": 6 + }, + { + "epoch": 0.02, + "learning_rate": 4.26829268292683e-06, + "loss": 0.9013, + "step": 7 + }, + { + "epoch": 0.02, + "learning_rate": 4.8780487804878055e-06, + "loss": 0.8904, + "step": 8 + }, + { + "epoch": 0.02, + "learning_rate": 5.487804878048781e-06, + "loss": 0.7205, + "step": 9 + }, + { + "epoch": 0.02, + "learning_rate": 6.0975609756097564e-06, + "loss": 0.6704, + "step": 10 + }, + { + "epoch": 0.03, + "learning_rate": 6.707317073170733e-06, + "loss": 0.6029, + "step": 11 + }, + { + "epoch": 0.03, + "learning_rate": 7.317073170731707e-06, + "loss": 0.5434, + "step": 12 + }, + { + "epoch": 0.03, + "learning_rate": 7.926829268292683e-06, + "loss": 0.5544, + "step": 13 + }, + { + "epoch": 0.03, + "learning_rate": 8.53658536585366e-06, + "loss": 0.5272, + "step": 14 + }, + { + "epoch": 0.04, + "learning_rate": 9.146341463414634e-06, + "loss": 0.504, + "step": 15 + }, + { + "epoch": 0.04, + "learning_rate": 9.756097560975611e-06, + "loss": 0.505, + "step": 16 + }, + { + "epoch": 0.04, + "learning_rate": 1.0365853658536585e-05, + "loss": 0.5116, + "step": 17 + }, + { + "epoch": 0.04, + "learning_rate": 1.0975609756097562e-05, + "loss": 0.5009, + "step": 18 + }, + { + "epoch": 0.05, + "learning_rate": 1.1585365853658537e-05, + "loss": 0.4888, + "step": 19 + }, + { + "epoch": 0.05, + "learning_rate": 1.2195121951219513e-05, + "loss": 0.4531, + "step": 20 + }, + { + "epoch": 0.05, + "learning_rate": 1.2804878048780488e-05, + "loss": 0.4701, + "step": 21 + }, + { + "epoch": 0.05, + "learning_rate": 1.3414634146341466e-05, + "loss": 0.4221, + "step": 22 + }, + { + "epoch": 0.06, + "learning_rate": 1.4024390243902441e-05, + "loss": 0.4427, + "step": 23 + }, + { + "epoch": 0.06, + "learning_rate": 1.4634146341463415e-05, + "loss": 0.4266, + "step": 24 + }, + { + "epoch": 0.06, + "learning_rate": 1.524390243902439e-05, + "loss": 0.4375, + "step": 25 + }, + { + "epoch": 0.06, + "learning_rate": 1.5853658536585366e-05, + "loss": 0.4361, + "step": 26 + }, + { + "epoch": 0.07, + "learning_rate": 1.6463414634146345e-05, + "loss": 0.4482, + "step": 27 + }, + { + "epoch": 0.07, + "learning_rate": 1.707317073170732e-05, + "loss": 0.4398, + "step": 28 + }, + { + "epoch": 0.07, + "learning_rate": 1.7682926829268292e-05, + "loss": 0.4464, + "step": 29 + }, + { + "epoch": 0.07, + "learning_rate": 1.8292682926829268e-05, + "loss": 0.4687, + "step": 30 + }, + { + "epoch": 0.08, + "learning_rate": 1.8902439024390246e-05, + "loss": 0.4459, + "step": 31 + }, + { + "epoch": 0.08, + "learning_rate": 1.9512195121951222e-05, + "loss": 0.4257, + "step": 32 + }, + { + "epoch": 0.08, + "learning_rate": 2.0121951219512197e-05, + "loss": 0.3982, + "step": 33 + }, + { + "epoch": 0.08, + "learning_rate": 2.073170731707317e-05, + "loss": 0.4211, + "step": 34 + }, + { + "epoch": 0.09, + "learning_rate": 2.134146341463415e-05, + "loss": 0.4319, + "step": 35 + }, + { + "epoch": 0.09, + "learning_rate": 2.1951219512195124e-05, + "loss": 0.4641, + "step": 36 + }, + { + "epoch": 0.09, + "learning_rate": 2.25609756097561e-05, + "loss": 0.4335, + "step": 37 + }, + { + "epoch": 0.09, + "learning_rate": 2.3170731707317075e-05, + "loss": 0.4278, + "step": 38 + }, + { + "epoch": 0.09, + "learning_rate": 2.378048780487805e-05, + "loss": 0.3997, + "step": 39 + }, + { + "epoch": 0.1, + "learning_rate": 2.4390243902439026e-05, + "loss": 0.4259, + "step": 40 + }, + { + "epoch": 0.1, + "learning_rate": 2.5e-05, + "loss": 0.4156, + "step": 41 + }, + { + "epoch": 0.1, + "learning_rate": 2.5609756097560977e-05, + "loss": 0.4356, + "step": 42 + }, + { + "epoch": 0.1, + "learning_rate": 2.6219512195121952e-05, + "loss": 0.3966, + "step": 43 + }, + { + "epoch": 0.11, + "learning_rate": 2.682926829268293e-05, + "loss": 0.4271, + "step": 44 + }, + { + "epoch": 0.11, + "learning_rate": 2.7439024390243906e-05, + "loss": 0.4372, + "step": 45 + }, + { + "epoch": 0.11, + "learning_rate": 2.8048780487804882e-05, + "loss": 0.4174, + "step": 46 + }, + { + "epoch": 0.11, + "learning_rate": 2.8658536585365854e-05, + "loss": 0.4342, + "step": 47 + }, + { + "epoch": 0.12, + "learning_rate": 2.926829268292683e-05, + "loss": 0.401, + "step": 48 + }, + { + "epoch": 0.12, + "learning_rate": 2.9878048780487805e-05, + "loss": 0.4027, + "step": 49 + }, + { + "epoch": 0.12, + "learning_rate": 3.048780487804878e-05, + "loss": 0.4319, + "step": 50 + }, + { + "epoch": 0.12, + "learning_rate": 3.109756097560976e-05, + "loss": 0.4345, + "step": 51 + }, + { + "epoch": 0.13, + "learning_rate": 3.170731707317073e-05, + "loss": 0.4136, + "step": 52 + }, + { + "epoch": 0.13, + "learning_rate": 3.231707317073171e-05, + "loss": 0.4233, + "step": 53 + }, + { + "epoch": 0.13, + "learning_rate": 3.292682926829269e-05, + "loss": 0.4089, + "step": 54 + }, + { + "epoch": 0.13, + "learning_rate": 3.353658536585366e-05, + "loss": 0.4379, + "step": 55 + }, + { + "epoch": 0.14, + "learning_rate": 3.414634146341464e-05, + "loss": 0.3893, + "step": 56 + }, + { + "epoch": 0.14, + "learning_rate": 3.475609756097561e-05, + "loss": 0.4188, + "step": 57 + }, + { + "epoch": 0.14, + "learning_rate": 3.5365853658536584e-05, + "loss": 0.4106, + "step": 58 + }, + { + "epoch": 0.14, + "learning_rate": 3.597560975609756e-05, + "loss": 0.45, + "step": 59 + }, + { + "epoch": 0.15, + "learning_rate": 3.6585365853658535e-05, + "loss": 0.3955, + "step": 60 + }, + { + "epoch": 0.15, + "learning_rate": 3.7195121951219514e-05, + "loss": 0.4393, + "step": 61 + }, + { + "epoch": 0.15, + "learning_rate": 3.780487804878049e-05, + "loss": 0.4256, + "step": 62 + }, + { + "epoch": 0.15, + "learning_rate": 3.8414634146341465e-05, + "loss": 0.4139, + "step": 63 + }, + { + "epoch": 0.16, + "learning_rate": 3.9024390243902444e-05, + "loss": 0.4423, + "step": 64 + }, + { + "epoch": 0.16, + "learning_rate": 3.9634146341463416e-05, + "loss": 0.4259, + "step": 65 + }, + { + "epoch": 0.16, + "learning_rate": 4.0243902439024395e-05, + "loss": 0.4225, + "step": 66 + }, + { + "epoch": 0.16, + "learning_rate": 4.085365853658537e-05, + "loss": 0.42, + "step": 67 + }, + { + "epoch": 0.17, + "learning_rate": 4.146341463414634e-05, + "loss": 0.4104, + "step": 68 + }, + { + "epoch": 0.17, + "learning_rate": 4.207317073170732e-05, + "loss": 0.4085, + "step": 69 + }, + { + "epoch": 0.17, + "learning_rate": 4.26829268292683e-05, + "loss": 0.421, + "step": 70 + }, + { + "epoch": 0.17, + "learning_rate": 4.329268292682927e-05, + "loss": 0.3984, + "step": 71 + }, + { + "epoch": 0.18, + "learning_rate": 4.390243902439025e-05, + "loss": 0.4428, + "step": 72 + }, + { + "epoch": 0.18, + "learning_rate": 4.451219512195122e-05, + "loss": 0.4468, + "step": 73 + }, + { + "epoch": 0.18, + "learning_rate": 4.51219512195122e-05, + "loss": 0.4245, + "step": 74 + }, + { + "epoch": 0.18, + "learning_rate": 4.573170731707318e-05, + "loss": 0.4008, + "step": 75 + }, + { + "epoch": 0.18, + "learning_rate": 4.634146341463415e-05, + "loss": 0.4013, + "step": 76 + }, + { + "epoch": 0.19, + "learning_rate": 4.695121951219512e-05, + "loss": 0.4276, + "step": 77 + }, + { + "epoch": 0.19, + "learning_rate": 4.75609756097561e-05, + "loss": 0.4307, + "step": 78 + }, + { + "epoch": 0.19, + "learning_rate": 4.817073170731707e-05, + "loss": 0.3836, + "step": 79 + }, + { + "epoch": 0.19, + "learning_rate": 4.878048780487805e-05, + "loss": 0.4272, + "step": 80 + }, + { + "epoch": 0.2, + "learning_rate": 4.9390243902439024e-05, + "loss": 0.4091, + "step": 81 + }, + { + "epoch": 0.2, + "learning_rate": 5e-05, + "loss": 0.3907, + "step": 82 + }, + { + "epoch": 0.2, + "learning_rate": 4.9999968146329897e-05, + "loss": 0.4449, + "step": 83 + }, + { + "epoch": 0.2, + "learning_rate": 4.9999872585400745e-05, + "loss": 0.3889, + "step": 84 + }, + { + "epoch": 0.21, + "learning_rate": 4.999971331745607e-05, + "loss": 0.4582, + "step": 85 + }, + { + "epoch": 0.21, + "learning_rate": 4.9999490342901726e-05, + "loss": 0.4386, + "step": 86 + }, + { + "epoch": 0.21, + "learning_rate": 4.9999203662305926e-05, + "loss": 0.4174, + "step": 87 + }, + { + "epoch": 0.21, + "learning_rate": 4.9998853276399215e-05, + "loss": 0.4124, + "step": 88 + }, + { + "epoch": 0.22, + "learning_rate": 4.9998439186074476e-05, + "loss": 0.4114, + "step": 89 + }, + { + "epoch": 0.22, + "learning_rate": 4.999796139238694e-05, + "loss": 0.4208, + "step": 90 + }, + { + "epoch": 0.22, + "learning_rate": 4.999741989655415e-05, + "loss": 0.4266, + "step": 91 + }, + { + "epoch": 0.22, + "learning_rate": 4.999681469995601e-05, + "loss": 0.3977, + "step": 92 + }, + { + "epoch": 0.23, + "learning_rate": 4.999614580413473e-05, + "loss": 0.4004, + "step": 93 + }, + { + "epoch": 0.23, + "learning_rate": 4.9995413210794864e-05, + "loss": 0.4481, + "step": 94 + }, + { + "epoch": 0.23, + "learning_rate": 4.9994616921803264e-05, + "loss": 0.4147, + "step": 95 + }, + { + "epoch": 0.23, + "learning_rate": 4.999375693918911e-05, + "loss": 0.4221, + "step": 96 + }, + { + "epoch": 0.24, + "learning_rate": 4.99928332651439e-05, + "loss": 0.428, + "step": 97 + }, + { + "epoch": 0.24, + "learning_rate": 4.999184590202141e-05, + "loss": 0.4283, + "step": 98 + }, + { + "epoch": 0.24, + "learning_rate": 4.999079485233775e-05, + "loss": 0.4324, + "step": 99 + }, + { + "epoch": 0.24, + "learning_rate": 4.9989680118771284e-05, + "loss": 0.4293, + "step": 100 + }, + { + "epoch": 0.25, + "learning_rate": 4.99885017041627e-05, + "loss": 0.4466, + "step": 101 + }, + { + "epoch": 0.25, + "learning_rate": 4.998725961151493e-05, + "loss": 0.4101, + "step": 102 + }, + { + "epoch": 0.25, + "learning_rate": 4.9985953843993194e-05, + "loss": 0.3773, + "step": 103 + }, + { + "epoch": 0.25, + "learning_rate": 4.998458440492497e-05, + "loss": 0.4226, + "step": 104 + }, + { + "epoch": 0.26, + "learning_rate": 4.9983151297800005e-05, + "loss": 0.4156, + "step": 105 + }, + { + "epoch": 0.26, + "learning_rate": 4.998165452627025e-05, + "loss": 0.3961, + "step": 106 + }, + { + "epoch": 0.26, + "learning_rate": 4.9980094094149945e-05, + "loss": 0.4271, + "step": 107 + }, + { + "epoch": 0.26, + "learning_rate": 4.997847000541551e-05, + "loss": 0.4275, + "step": 108 + }, + { + "epoch": 0.27, + "learning_rate": 4.997678226420561e-05, + "loss": 0.3846, + "step": 109 + }, + { + "epoch": 0.27, + "learning_rate": 4.99750308748211e-05, + "loss": 0.4237, + "step": 110 + }, + { + "epoch": 0.27, + "learning_rate": 4.997321584172504e-05, + "loss": 0.4215, + "step": 111 + }, + { + "epoch": 0.27, + "learning_rate": 4.9971337169542665e-05, + "loss": 0.3897, + "step": 112 + }, + { + "epoch": 0.28, + "learning_rate": 4.996939486306138e-05, + "loss": 0.4016, + "step": 113 + }, + { + "epoch": 0.28, + "learning_rate": 4.996738892723075e-05, + "loss": 0.4399, + "step": 114 + }, + { + "epoch": 0.28, + "learning_rate": 4.99653193671625e-05, + "loss": 0.4347, + "step": 115 + }, + { + "epoch": 0.28, + "learning_rate": 4.996318618813046e-05, + "loss": 0.4371, + "step": 116 + }, + { + "epoch": 0.28, + "learning_rate": 4.996098939557062e-05, + "loss": 0.4298, + "step": 117 + }, + { + "epoch": 0.29, + "learning_rate": 4.995872899508103e-05, + "loss": 0.4204, + "step": 118 + }, + { + "epoch": 0.29, + "learning_rate": 4.995640499242187e-05, + "loss": 0.3856, + "step": 119 + }, + { + "epoch": 0.29, + "learning_rate": 4.995401739351536e-05, + "loss": 0.4044, + "step": 120 + }, + { + "epoch": 0.29, + "learning_rate": 4.9951566204445834e-05, + "loss": 0.4019, + "step": 121 + }, + { + "epoch": 0.3, + "learning_rate": 4.9949051431459615e-05, + "loss": 0.4484, + "step": 122 + }, + { + "epoch": 0.3, + "learning_rate": 4.994647308096509e-05, + "loss": 0.4149, + "step": 123 + }, + { + "epoch": 0.3, + "learning_rate": 4.9943831159532665e-05, + "loss": 0.4163, + "step": 124 + }, + { + "epoch": 0.3, + "learning_rate": 4.994112567389471e-05, + "loss": 0.4097, + "step": 125 + }, + { + "epoch": 0.31, + "learning_rate": 4.9938356630945616e-05, + "loss": 0.4045, + "step": 126 + }, + { + "epoch": 0.31, + "learning_rate": 4.99355240377417e-05, + "loss": 0.4257, + "step": 127 + }, + { + "epoch": 0.31, + "learning_rate": 4.993262790150126e-05, + "loss": 0.3949, + "step": 128 + }, + { + "epoch": 0.31, + "learning_rate": 4.99296682296045e-05, + "loss": 0.4253, + "step": 129 + }, + { + "epoch": 0.32, + "learning_rate": 4.992664502959351e-05, + "loss": 0.3911, + "step": 130 + }, + { + "epoch": 0.32, + "learning_rate": 4.992355830917232e-05, + "loss": 0.4163, + "step": 131 + }, + { + "epoch": 0.32, + "learning_rate": 4.992040807620678e-05, + "loss": 0.3949, + "step": 132 + }, + { + "epoch": 0.32, + "learning_rate": 4.9917194338724614e-05, + "loss": 0.4146, + "step": 133 + }, + { + "epoch": 0.33, + "learning_rate": 4.9913917104915374e-05, + "loss": 0.4143, + "step": 134 + }, + { + "epoch": 0.33, + "learning_rate": 4.9910576383130414e-05, + "loss": 0.4096, + "step": 135 + }, + { + "epoch": 0.33, + "learning_rate": 4.990717218188286e-05, + "loss": 0.3887, + "step": 136 + }, + { + "epoch": 0.33, + "learning_rate": 4.990370450984763e-05, + "loss": 0.4135, + "step": 137 + }, + { + "epoch": 0.34, + "learning_rate": 4.990017337586137e-05, + "loss": 0.426, + "step": 138 + }, + { + "epoch": 0.34, + "learning_rate": 4.989657878892244e-05, + "loss": 0.4379, + "step": 139 + }, + { + "epoch": 0.34, + "learning_rate": 4.9892920758190907e-05, + "loss": 0.4185, + "step": 140 + }, + { + "epoch": 0.34, + "learning_rate": 4.988919929298851e-05, + "loss": 0.4309, + "step": 141 + }, + { + "epoch": 0.35, + "learning_rate": 4.9885414402798624e-05, + "loss": 0.4489, + "step": 142 + }, + { + "epoch": 0.35, + "learning_rate": 4.988156609726628e-05, + "loss": 0.3993, + "step": 143 + }, + { + "epoch": 0.35, + "learning_rate": 4.987765438619806e-05, + "loss": 0.4559, + "step": 144 + }, + { + "epoch": 0.35, + "learning_rate": 4.987367927956218e-05, + "loss": 0.4005, + "step": 145 + }, + { + "epoch": 0.36, + "learning_rate": 4.986964078748837e-05, + "loss": 0.3977, + "step": 146 + }, + { + "epoch": 0.36, + "learning_rate": 4.986553892026789e-05, + "loss": 0.4036, + "step": 147 + }, + { + "epoch": 0.36, + "learning_rate": 4.9861373688353504e-05, + "loss": 0.4411, + "step": 148 + }, + { + "epoch": 0.36, + "learning_rate": 4.9857145102359456e-05, + "loss": 0.4303, + "step": 149 + }, + { + "epoch": 0.37, + "learning_rate": 4.985285317306141e-05, + "loss": 0.4416, + "step": 150 + }, + { + "epoch": 0.37, + "learning_rate": 4.984849791139646e-05, + "loss": 0.3917, + "step": 151 + }, + { + "epoch": 0.37, + "learning_rate": 4.984407932846311e-05, + "loss": 0.3887, + "step": 152 + }, + { + "epoch": 0.37, + "learning_rate": 4.983959743552118e-05, + "loss": 0.4235, + "step": 153 + }, + { + "epoch": 0.37, + "learning_rate": 4.9835052243991874e-05, + "loss": 0.3951, + "step": 154 + }, + { + "epoch": 0.38, + "learning_rate": 4.983044376545767e-05, + "loss": 0.3995, + "step": 155 + }, + { + "epoch": 0.38, + "learning_rate": 4.982577201166232e-05, + "loss": 0.3995, + "step": 156 + }, + { + "epoch": 0.38, + "learning_rate": 4.982103699451082e-05, + "loss": 0.4131, + "step": 157 + }, + { + "epoch": 0.38, + "learning_rate": 4.981623872606938e-05, + "loss": 0.4159, + "step": 158 + }, + { + "epoch": 0.39, + "learning_rate": 4.981137721856541e-05, + "loss": 0.4039, + "step": 159 + }, + { + "epoch": 0.39, + "learning_rate": 4.980645248438745e-05, + "loss": 0.442, + "step": 160 + }, + { + "epoch": 0.39, + "learning_rate": 4.980146453608518e-05, + "loss": 0.4113, + "step": 161 + }, + { + "epoch": 0.39, + "learning_rate": 4.979641338636935e-05, + "loss": 0.4177, + "step": 162 + }, + { + "epoch": 0.4, + "learning_rate": 4.979129904811176e-05, + "loss": 0.4017, + "step": 163 + }, + { + "epoch": 0.4, + "learning_rate": 4.9786121534345265e-05, + "loss": 0.4274, + "step": 164 + }, + { + "epoch": 0.4, + "learning_rate": 4.978088085826368e-05, + "loss": 0.4544, + "step": 165 + }, + { + "epoch": 0.4, + "learning_rate": 4.977557703322178e-05, + "loss": 0.39, + "step": 166 + }, + { + "epoch": 0.41, + "learning_rate": 4.977021007273528e-05, + "loss": 0.418, + "step": 167 + }, + { + "epoch": 0.41, + "learning_rate": 4.976477999048077e-05, + "loss": 0.3923, + "step": 168 + }, + { + "epoch": 0.41, + "learning_rate": 4.97592868002957e-05, + "loss": 0.4087, + "step": 169 + }, + { + "epoch": 0.41, + "learning_rate": 4.9753730516178313e-05, + "loss": 0.4061, + "step": 170 + }, + { + "epoch": 0.42, + "learning_rate": 4.974811115228767e-05, + "loss": 0.3747, + "step": 171 + }, + { + "epoch": 0.42, + "learning_rate": 4.9742428722943545e-05, + "loss": 0.399, + "step": 172 + }, + { + "epoch": 0.42, + "learning_rate": 4.973668324262645e-05, + "loss": 0.3833, + "step": 173 + }, + { + "epoch": 0.42, + "learning_rate": 4.973087472597754e-05, + "loss": 0.4333, + "step": 174 + }, + { + "epoch": 0.43, + "learning_rate": 4.972500318779863e-05, + "loss": 0.406, + "step": 175 + }, + { + "epoch": 0.43, + "learning_rate": 4.9719068643052135e-05, + "loss": 0.39, + "step": 176 + }, + { + "epoch": 0.43, + "learning_rate": 4.9713071106860996e-05, + "loss": 0.4317, + "step": 177 + }, + { + "epoch": 0.43, + "learning_rate": 4.970701059450872e-05, + "loss": 0.4173, + "step": 178 + }, + { + "epoch": 0.44, + "learning_rate": 4.9700887121439244e-05, + "loss": 0.3884, + "step": 179 + }, + { + "epoch": 0.44, + "learning_rate": 4.969470070325699e-05, + "loss": 0.3944, + "step": 180 + }, + { + "epoch": 0.44, + "learning_rate": 4.968845135572677e-05, + "loss": 0.4076, + "step": 181 + }, + { + "epoch": 0.44, + "learning_rate": 4.968213909477376e-05, + "loss": 0.4195, + "step": 182 + }, + { + "epoch": 0.45, + "learning_rate": 4.967576393648344e-05, + "loss": 0.4093, + "step": 183 + }, + { + "epoch": 0.45, + "learning_rate": 4.9669325897101604e-05, + "loss": 0.3974, + "step": 184 + }, + { + "epoch": 0.45, + "learning_rate": 4.966282499303424e-05, + "loss": 0.4025, + "step": 185 + }, + { + "epoch": 0.45, + "learning_rate": 4.965626124084759e-05, + "loss": 0.4058, + "step": 186 + }, + { + "epoch": 0.46, + "learning_rate": 4.9649634657267995e-05, + "loss": 0.4007, + "step": 187 + }, + { + "epoch": 0.46, + "learning_rate": 4.964294525918196e-05, + "loss": 0.4218, + "step": 188 + }, + { + "epoch": 0.46, + "learning_rate": 4.963619306363602e-05, + "loss": 0.4141, + "step": 189 + }, + { + "epoch": 0.46, + "learning_rate": 4.962937808783675e-05, + "loss": 0.4233, + "step": 190 + }, + { + "epoch": 0.46, + "learning_rate": 4.9622500349150716e-05, + "loss": 0.3931, + "step": 191 + }, + { + "epoch": 0.47, + "learning_rate": 4.961555986510442e-05, + "loss": 0.4144, + "step": 192 + }, + { + "epoch": 0.47, + "learning_rate": 4.960855665338424e-05, + "loss": 0.3957, + "step": 193 + }, + { + "epoch": 0.47, + "learning_rate": 4.960149073183643e-05, + "loss": 0.3879, + "step": 194 + }, + { + "epoch": 0.47, + "learning_rate": 4.959436211846703e-05, + "loss": 0.4152, + "step": 195 + }, + { + "epoch": 0.48, + "learning_rate": 4.958717083144182e-05, + "loss": 0.4143, + "step": 196 + }, + { + "epoch": 0.48, + "learning_rate": 4.957991688908634e-05, + "loss": 0.3976, + "step": 197 + }, + { + "epoch": 0.48, + "learning_rate": 4.9572600309885744e-05, + "loss": 0.4072, + "step": 198 + }, + { + "epoch": 0.48, + "learning_rate": 4.956522111248483e-05, + "loss": 0.3903, + "step": 199 + }, + { + "epoch": 0.49, + "learning_rate": 4.955777931568797e-05, + "loss": 0.3908, + "step": 200 + }, + { + "epoch": 0.49, + "learning_rate": 4.955027493845903e-05, + "loss": 0.4284, + "step": 201 + }, + { + "epoch": 0.49, + "learning_rate": 4.954270799992138e-05, + "loss": 0.4072, + "step": 202 + }, + { + "epoch": 0.49, + "learning_rate": 4.953507851935779e-05, + "loss": 0.43, + "step": 203 + }, + { + "epoch": 0.5, + "learning_rate": 4.952738651621043e-05, + "loss": 0.4228, + "step": 204 + }, + { + "epoch": 0.5, + "learning_rate": 4.951963201008076e-05, + "loss": 0.3991, + "step": 205 + }, + { + "epoch": 0.5, + "learning_rate": 4.951181502072957e-05, + "loss": 0.4057, + "step": 206 + }, + { + "epoch": 0.5, + "learning_rate": 4.950393556807682e-05, + "loss": 0.3987, + "step": 207 + }, + { + "epoch": 0.51, + "learning_rate": 4.949599367220168e-05, + "loss": 0.4142, + "step": 208 + }, + { + "epoch": 0.51, + "learning_rate": 4.948798935334242e-05, + "loss": 0.3994, + "step": 209 + }, + { + "epoch": 0.51, + "learning_rate": 4.9479922631896405e-05, + "loss": 0.3989, + "step": 210 + }, + { + "epoch": 0.51, + "learning_rate": 4.947179352842001e-05, + "loss": 0.4186, + "step": 211 + }, + { + "epoch": 0.52, + "learning_rate": 4.946360206362858e-05, + "loss": 0.3896, + "step": 212 + }, + { + "epoch": 0.52, + "learning_rate": 4.9455348258396364e-05, + "loss": 0.4122, + "step": 213 + }, + { + "epoch": 0.52, + "learning_rate": 4.944703213375648e-05, + "loss": 0.4319, + "step": 214 + }, + { + "epoch": 0.52, + "learning_rate": 4.9438653710900864e-05, + "loss": 0.3997, + "step": 215 + }, + { + "epoch": 0.53, + "learning_rate": 4.943021301118019e-05, + "loss": 0.3924, + "step": 216 + }, + { + "epoch": 0.53, + "learning_rate": 4.942171005610385e-05, + "loss": 0.3952, + "step": 217 + }, + { + "epoch": 0.53, + "learning_rate": 4.941314486733986e-05, + "loss": 0.4137, + "step": 218 + }, + { + "epoch": 0.53, + "learning_rate": 4.940451746671484e-05, + "loss": 0.4277, + "step": 219 + }, + { + "epoch": 0.54, + "learning_rate": 4.9395827876213936e-05, + "loss": 0.4003, + "step": 220 + }, + { + "epoch": 0.54, + "learning_rate": 4.938707611798078e-05, + "loss": 0.3884, + "step": 221 + }, + { + "epoch": 0.54, + "learning_rate": 4.937826221431742e-05, + "loss": 0.4003, + "step": 222 + }, + { + "epoch": 0.54, + "learning_rate": 4.936938618768426e-05, + "loss": 0.4183, + "step": 223 + }, + { + "epoch": 0.55, + "learning_rate": 4.936044806070004e-05, + "loss": 0.4319, + "step": 224 + }, + { + "epoch": 0.55, + "learning_rate": 4.935144785614173e-05, + "loss": 0.3968, + "step": 225 + }, + { + "epoch": 0.55, + "learning_rate": 4.934238559694448e-05, + "loss": 0.3749, + "step": 226 + }, + { + "epoch": 0.55, + "learning_rate": 4.9333261306201595e-05, + "loss": 0.4044, + "step": 227 + }, + { + "epoch": 0.55, + "learning_rate": 4.932407500716445e-05, + "loss": 0.4067, + "step": 228 + }, + { + "epoch": 0.56, + "learning_rate": 4.9314826723242425e-05, + "loss": 0.417, + "step": 229 + }, + { + "epoch": 0.56, + "learning_rate": 4.9305516478002865e-05, + "loss": 0.4099, + "step": 230 + }, + { + "epoch": 0.56, + "learning_rate": 4.9296144295171024e-05, + "loss": 0.4201, + "step": 231 + }, + { + "epoch": 0.56, + "learning_rate": 4.928671019862995e-05, + "loss": 0.3848, + "step": 232 + }, + { + "epoch": 0.57, + "learning_rate": 4.92772142124205e-05, + "loss": 0.3959, + "step": 233 + }, + { + "epoch": 0.57, + "learning_rate": 4.9267656360741245e-05, + "loss": 0.3794, + "step": 234 + }, + { + "epoch": 0.57, + "learning_rate": 4.925803666794838e-05, + "loss": 0.3956, + "step": 235 + }, + { + "epoch": 0.57, + "learning_rate": 4.924835515855572e-05, + "loss": 0.423, + "step": 236 + }, + { + "epoch": 0.58, + "learning_rate": 4.92386118572346e-05, + "loss": 0.4015, + "step": 237 + }, + { + "epoch": 0.58, + "learning_rate": 4.92288067888138e-05, + "loss": 0.4043, + "step": 238 + }, + { + "epoch": 0.58, + "learning_rate": 4.921893997827951e-05, + "loss": 0.3711, + "step": 239 + }, + { + "epoch": 0.58, + "learning_rate": 4.920901145077527e-05, + "loss": 0.4248, + "step": 240 + }, + { + "epoch": 0.59, + "learning_rate": 4.919902123160187e-05, + "loss": 0.4235, + "step": 241 + }, + { + "epoch": 0.59, + "learning_rate": 4.918896934621734e-05, + "loss": 0.4214, + "step": 242 + }, + { + "epoch": 0.59, + "learning_rate": 4.9178855820236824e-05, + "loss": 0.3827, + "step": 243 + }, + { + "epoch": 0.59, + "learning_rate": 4.916868067943256e-05, + "loss": 0.3948, + "step": 244 + }, + { + "epoch": 0.6, + "learning_rate": 4.915844394973379e-05, + "loss": 0.3697, + "step": 245 + }, + { + "epoch": 0.6, + "learning_rate": 4.914814565722671e-05, + "loss": 0.4164, + "step": 246 + }, + { + "epoch": 0.6, + "learning_rate": 4.9137785828154393e-05, + "loss": 0.3942, + "step": 247 + }, + { + "epoch": 0.6, + "learning_rate": 4.9127364488916716e-05, + "loss": 0.3949, + "step": 248 + }, + { + "epoch": 0.61, + "learning_rate": 4.9116881666070327e-05, + "loss": 0.3867, + "step": 249 + }, + { + "epoch": 0.61, + "learning_rate": 4.9106337386328524e-05, + "loss": 0.3842, + "step": 250 + }, + { + "epoch": 0.61, + "learning_rate": 4.909573167656124e-05, + "loss": 0.3975, + "step": 251 + }, + { + "epoch": 0.61, + "learning_rate": 4.9085064563794925e-05, + "loss": 0.4215, + "step": 252 + }, + { + "epoch": 0.62, + "learning_rate": 4.907433607521251e-05, + "loss": 0.3782, + "step": 253 + }, + { + "epoch": 0.62, + "learning_rate": 4.906354623815336e-05, + "loss": 0.399, + "step": 254 + }, + { + "epoch": 0.62, + "learning_rate": 4.905269508011312e-05, + "loss": 0.4041, + "step": 255 + }, + { + "epoch": 0.62, + "learning_rate": 4.904178262874374e-05, + "loss": 0.3899, + "step": 256 + }, + { + "epoch": 0.63, + "learning_rate": 4.903080891185335e-05, + "loss": 0.3772, + "step": 257 + }, + { + "epoch": 0.63, + "learning_rate": 4.901977395740619e-05, + "loss": 0.4334, + "step": 258 + }, + { + "epoch": 0.63, + "learning_rate": 4.9008677793522584e-05, + "loss": 0.383, + "step": 259 + }, + { + "epoch": 0.63, + "learning_rate": 4.899752044847881e-05, + "loss": 0.4064, + "step": 260 + }, + { + "epoch": 0.64, + "learning_rate": 4.898630195070705e-05, + "loss": 0.3921, + "step": 261 + }, + { + "epoch": 0.64, + "learning_rate": 4.8975022328795325e-05, + "loss": 0.415, + "step": 262 + }, + { + "epoch": 0.64, + "learning_rate": 4.8963681611487445e-05, + "loss": 0.4128, + "step": 263 + }, + { + "epoch": 0.64, + "learning_rate": 4.895227982768287e-05, + "loss": 0.4232, + "step": 264 + }, + { + "epoch": 0.64, + "learning_rate": 4.89408170064367e-05, + "loss": 0.3914, + "step": 265 + }, + { + "epoch": 0.65, + "learning_rate": 4.892929317695957e-05, + "loss": 0.404, + "step": 266 + }, + { + "epoch": 0.65, + "learning_rate": 4.891770836861757e-05, + "loss": 0.4274, + "step": 267 + }, + { + "epoch": 0.65, + "learning_rate": 4.8906062610932215e-05, + "loss": 0.4025, + "step": 268 + }, + { + "epoch": 0.65, + "learning_rate": 4.889435593358029e-05, + "loss": 0.3822, + "step": 269 + }, + { + "epoch": 0.66, + "learning_rate": 4.888258836639386e-05, + "loss": 0.4048, + "step": 270 + }, + { + "epoch": 0.66, + "learning_rate": 4.8870759939360136e-05, + "loss": 0.3952, + "step": 271 + }, + { + "epoch": 0.66, + "learning_rate": 4.885887068262143e-05, + "loss": 0.4112, + "step": 272 + }, + { + "epoch": 0.66, + "learning_rate": 4.884692062647506e-05, + "loss": 0.4039, + "step": 273 + }, + { + "epoch": 0.67, + "learning_rate": 4.8834909801373264e-05, + "loss": 0.4157, + "step": 274 + }, + { + "epoch": 0.67, + "learning_rate": 4.8822838237923166e-05, + "loss": 0.4066, + "step": 275 + }, + { + "epoch": 0.67, + "learning_rate": 4.881070596688664e-05, + "loss": 0.387, + "step": 276 + }, + { + "epoch": 0.67, + "learning_rate": 4.8798513019180295e-05, + "loss": 0.407, + "step": 277 + }, + { + "epoch": 0.68, + "learning_rate": 4.878625942587532e-05, + "loss": 0.4103, + "step": 278 + }, + { + "epoch": 0.68, + "learning_rate": 4.877394521819747e-05, + "loss": 0.411, + "step": 279 + }, + { + "epoch": 0.68, + "learning_rate": 4.8761570427526973e-05, + "loss": 0.3986, + "step": 280 + }, + { + "epoch": 0.68, + "learning_rate": 4.874913508539844e-05, + "loss": 0.3858, + "step": 281 + }, + { + "epoch": 0.69, + "learning_rate": 4.873663922350073e-05, + "loss": 0.4145, + "step": 282 + }, + { + "epoch": 0.69, + "learning_rate": 4.8724082873677027e-05, + "loss": 0.4027, + "step": 283 + }, + { + "epoch": 0.69, + "learning_rate": 4.871146606792455e-05, + "loss": 0.393, + "step": 284 + }, + { + "epoch": 0.69, + "learning_rate": 4.8698788838394644e-05, + "loss": 0.3802, + "step": 285 + }, + { + "epoch": 0.7, + "learning_rate": 4.8686051217392606e-05, + "loss": 0.3923, + "step": 286 + }, + { + "epoch": 0.7, + "learning_rate": 4.867325323737765e-05, + "loss": 0.3985, + "step": 287 + }, + { + "epoch": 0.7, + "learning_rate": 4.866039493096276e-05, + "loss": 0.3941, + "step": 288 + }, + { + "epoch": 0.7, + "learning_rate": 4.86474763309147e-05, + "loss": 0.3776, + "step": 289 + }, + { + "epoch": 0.71, + "learning_rate": 4.863449747015384e-05, + "loss": 0.4265, + "step": 290 + }, + { + "epoch": 0.71, + "learning_rate": 4.862145838175413e-05, + "loss": 0.4001, + "step": 291 + }, + { + "epoch": 0.71, + "learning_rate": 4.860835909894301e-05, + "loss": 0.4198, + "step": 292 + }, + { + "epoch": 0.71, + "learning_rate": 4.859519965510129e-05, + "loss": 0.383, + "step": 293 + }, + { + "epoch": 0.72, + "learning_rate": 4.858198008376308e-05, + "loss": 0.4056, + "step": 294 + }, + { + "epoch": 0.72, + "learning_rate": 4.856870041861575e-05, + "loss": 0.4108, + "step": 295 + }, + { + "epoch": 0.72, + "learning_rate": 4.8555360693499786e-05, + "loss": 0.3703, + "step": 296 + }, + { + "epoch": 0.72, + "learning_rate": 4.8541960942408716e-05, + "loss": 0.3799, + "step": 297 + }, + { + "epoch": 0.73, + "learning_rate": 4.852850119948904e-05, + "loss": 0.3736, + "step": 298 + }, + { + "epoch": 0.73, + "learning_rate": 4.851498149904014e-05, + "loss": 0.3908, + "step": 299 + }, + { + "epoch": 0.73, + "learning_rate": 4.850140187551417e-05, + "loss": 0.3968, + "step": 300 + }, + { + "epoch": 0.73, + "learning_rate": 4.8487762363516024e-05, + "loss": 0.3925, + "step": 301 + }, + { + "epoch": 0.74, + "learning_rate": 4.847406299780316e-05, + "loss": 0.3768, + "step": 302 + }, + { + "epoch": 0.74, + "learning_rate": 4.8460303813285585e-05, + "loss": 0.4419, + "step": 303 + }, + { + "epoch": 0.74, + "learning_rate": 4.844648484502575e-05, + "loss": 0.3688, + "step": 304 + }, + { + "epoch": 0.74, + "learning_rate": 4.843260612823844e-05, + "loss": 0.4208, + "step": 305 + }, + { + "epoch": 0.74, + "learning_rate": 4.8418667698290696e-05, + "loss": 0.4063, + "step": 306 + }, + { + "epoch": 0.75, + "learning_rate": 4.840466959070174e-05, + "loss": 0.3719, + "step": 307 + }, + { + "epoch": 0.75, + "learning_rate": 4.839061184114285e-05, + "loss": 0.3985, + "step": 308 + }, + { + "epoch": 0.75, + "learning_rate": 4.837649448543731e-05, + "loss": 0.3868, + "step": 309 + }, + { + "epoch": 0.75, + "learning_rate": 4.8362317559560274e-05, + "loss": 0.3881, + "step": 310 + }, + { + "epoch": 0.76, + "learning_rate": 4.834808109963873e-05, + "loss": 0.4067, + "step": 311 + }, + { + "epoch": 0.76, + "learning_rate": 4.833378514195133e-05, + "loss": 0.3883, + "step": 312 + }, + { + "epoch": 0.76, + "learning_rate": 4.83194297229284e-05, + "loss": 0.3996, + "step": 313 + }, + { + "epoch": 0.76, + "learning_rate": 4.830501487915174e-05, + "loss": 0.4075, + "step": 314 + }, + { + "epoch": 0.77, + "learning_rate": 4.8290540647354624e-05, + "loss": 0.3918, + "step": 315 + }, + { + "epoch": 0.77, + "learning_rate": 4.8276007064421635e-05, + "loss": 0.4206, + "step": 316 + }, + { + "epoch": 0.77, + "learning_rate": 4.826141416738861e-05, + "loss": 0.3924, + "step": 317 + }, + { + "epoch": 0.77, + "learning_rate": 4.824676199344253e-05, + "loss": 0.3814, + "step": 318 + }, + { + "epoch": 0.78, + "learning_rate": 4.8232050579921445e-05, + "loss": 0.3809, + "step": 319 + }, + { + "epoch": 0.78, + "learning_rate": 4.821727996431435e-05, + "loss": 0.3979, + "step": 320 + }, + { + "epoch": 0.78, + "learning_rate": 4.8202450184261116e-05, + "loss": 0.4201, + "step": 321 + }, + { + "epoch": 0.78, + "learning_rate": 4.8187561277552374e-05, + "loss": 0.3785, + "step": 322 + }, + { + "epoch": 0.79, + "learning_rate": 4.817261328212942e-05, + "loss": 0.3918, + "step": 323 + }, + { + "epoch": 0.79, + "learning_rate": 4.815760623608415e-05, + "loss": 0.3789, + "step": 324 + }, + { + "epoch": 0.79, + "learning_rate": 4.8142540177658925e-05, + "loss": 0.3967, + "step": 325 + }, + { + "epoch": 0.79, + "learning_rate": 4.812741514524647e-05, + "loss": 0.4155, + "step": 326 + }, + { + "epoch": 0.8, + "learning_rate": 4.811223117738981e-05, + "loss": 0.3727, + "step": 327 + }, + { + "epoch": 0.8, + "learning_rate": 4.8096988312782174e-05, + "loss": 0.396, + "step": 328 + }, + { + "epoch": 0.8, + "learning_rate": 4.8081686590266835e-05, + "loss": 0.3694, + "step": 329 + }, + { + "epoch": 0.8, + "learning_rate": 4.806632604883708e-05, + "loss": 0.3919, + "step": 330 + }, + { + "epoch": 0.81, + "learning_rate": 4.8050906727636085e-05, + "loss": 0.3757, + "step": 331 + }, + { + "epoch": 0.81, + "learning_rate": 4.8035428665956806e-05, + "loss": 0.381, + "step": 332 + }, + { + "epoch": 0.81, + "learning_rate": 4.801989190324188e-05, + "loss": 0.3915, + "step": 333 + }, + { + "epoch": 0.81, + "learning_rate": 4.800429647908354e-05, + "loss": 0.3995, + "step": 334 + }, + { + "epoch": 0.82, + "learning_rate": 4.798864243322353e-05, + "loss": 0.4188, + "step": 335 + }, + { + "epoch": 0.82, + "learning_rate": 4.7972929805552926e-05, + "loss": 0.3832, + "step": 336 + }, + { + "epoch": 0.82, + "learning_rate": 4.795715863611212e-05, + "loss": 0.3624, + "step": 337 + }, + { + "epoch": 0.82, + "learning_rate": 4.79413289650907e-05, + "loss": 0.3779, + "step": 338 + }, + { + "epoch": 0.83, + "learning_rate": 4.7925440832827307e-05, + "loss": 0.425, + "step": 339 + }, + { + "epoch": 0.83, + "learning_rate": 4.790949427980956e-05, + "loss": 0.3829, + "step": 340 + }, + { + "epoch": 0.83, + "learning_rate": 4.7893489346673965e-05, + "loss": 0.3877, + "step": 341 + }, + { + "epoch": 0.83, + "learning_rate": 4.7877426074205786e-05, + "loss": 0.4043, + "step": 342 + }, + { + "epoch": 0.83, + "learning_rate": 4.786130450333897e-05, + "loss": 0.3687, + "step": 343 + }, + { + "epoch": 0.84, + "learning_rate": 4.784512467515599e-05, + "loss": 0.3679, + "step": 344 + }, + { + "epoch": 0.84, + "learning_rate": 4.782888663088781e-05, + "loss": 0.3957, + "step": 345 + }, + { + "epoch": 0.84, + "learning_rate": 4.781259041191375e-05, + "loss": 0.4215, + "step": 346 + }, + { + "epoch": 0.84, + "learning_rate": 4.7796236059761346e-05, + "loss": 0.3881, + "step": 347 + }, + { + "epoch": 0.85, + "learning_rate": 4.777982361610629e-05, + "loss": 0.3882, + "step": 348 + }, + { + "epoch": 0.85, + "learning_rate": 4.7763353122772305e-05, + "loss": 0.386, + "step": 349 + }, + { + "epoch": 0.85, + "learning_rate": 4.774682462173105e-05, + "loss": 0.3747, + "step": 350 + }, + { + "epoch": 0.85, + "learning_rate": 4.773023815510199e-05, + "loss": 0.4025, + "step": 351 + }, + { + "epoch": 0.86, + "learning_rate": 4.7713593765152316e-05, + "loss": 0.3759, + "step": 352 + }, + { + "epoch": 0.86, + "learning_rate": 4.7696891494296826e-05, + "loss": 0.3693, + "step": 353 + }, + { + "epoch": 0.86, + "learning_rate": 4.7680131385097806e-05, + "loss": 0.3718, + "step": 354 + }, + { + "epoch": 0.86, + "learning_rate": 4.766331348026493e-05, + "loss": 0.3787, + "step": 355 + }, + { + "epoch": 0.87, + "learning_rate": 4.764643782265516e-05, + "loss": 0.3809, + "step": 356 + }, + { + "epoch": 0.87, + "learning_rate": 4.762950445527264e-05, + "loss": 0.416, + "step": 357 + }, + { + "epoch": 0.87, + "learning_rate": 4.7612513421268544e-05, + "loss": 0.3663, + "step": 358 + }, + { + "epoch": 0.87, + "learning_rate": 4.7595464763941024e-05, + "loss": 0.3872, + "step": 359 + }, + { + "epoch": 0.88, + "learning_rate": 4.7578358526735065e-05, + "loss": 0.3923, + "step": 360 + }, + { + "epoch": 0.88, + "learning_rate": 4.756119475324237e-05, + "loss": 0.3853, + "step": 361 + }, + { + "epoch": 0.88, + "learning_rate": 4.7543973487201286e-05, + "loss": 0.4108, + "step": 362 + }, + { + "epoch": 0.88, + "learning_rate": 4.752669477249666e-05, + "loss": 0.3972, + "step": 363 + }, + { + "epoch": 0.89, + "learning_rate": 4.750935865315971e-05, + "loss": 0.3796, + "step": 364 + }, + { + "epoch": 0.89, + "learning_rate": 4.749196517336798e-05, + "loss": 0.3624, + "step": 365 + }, + { + "epoch": 0.89, + "learning_rate": 4.747451437744515e-05, + "loss": 0.3902, + "step": 366 + }, + { + "epoch": 0.89, + "learning_rate": 4.7457006309860976e-05, + "loss": 0.4268, + "step": 367 + }, + { + "epoch": 0.9, + "learning_rate": 4.7439441015231154e-05, + "loss": 0.3881, + "step": 368 + }, + { + "epoch": 0.9, + "learning_rate": 4.742181853831721e-05, + "loss": 0.3927, + "step": 369 + }, + { + "epoch": 0.9, + "learning_rate": 4.740413892402639e-05, + "loss": 0.4028, + "step": 370 + }, + { + "epoch": 0.9, + "learning_rate": 4.7386402217411555e-05, + "loss": 0.3957, + "step": 371 + }, + { + "epoch": 0.91, + "learning_rate": 4.7368608463671013e-05, + "loss": 0.3859, + "step": 372 + }, + { + "epoch": 0.91, + "learning_rate": 4.7350757708148495e-05, + "loss": 0.4055, + "step": 373 + }, + { + "epoch": 0.91, + "learning_rate": 4.733284999633297e-05, + "loss": 0.4085, + "step": 374 + }, + { + "epoch": 0.91, + "learning_rate": 4.731488537385853e-05, + "loss": 0.3968, + "step": 375 + }, + { + "epoch": 0.92, + "learning_rate": 4.729686388650432e-05, + "loss": 0.4205, + "step": 376 + }, + { + "epoch": 0.92, + "learning_rate": 4.7278785580194365e-05, + "loss": 0.3751, + "step": 377 + }, + { + "epoch": 0.92, + "learning_rate": 4.7260650500997514e-05, + "loss": 0.3866, + "step": 378 + }, + { + "epoch": 0.92, + "learning_rate": 4.724245869512727e-05, + "loss": 0.3916, + "step": 379 + }, + { + "epoch": 0.92, + "learning_rate": 4.722421020894169e-05, + "loss": 0.3858, + "step": 380 + }, + { + "epoch": 0.93, + "learning_rate": 4.7205905088943286e-05, + "loss": 0.4032, + "step": 381 + }, + { + "epoch": 0.93, + "learning_rate": 4.7187543381778864e-05, + "loss": 0.3772, + "step": 382 + }, + { + "epoch": 0.93, + "learning_rate": 4.716912513423945e-05, + "loss": 0.3906, + "step": 383 + }, + { + "epoch": 0.93, + "learning_rate": 4.715065039326015e-05, + "loss": 0.4172, + "step": 384 + }, + { + "epoch": 0.94, + "learning_rate": 4.7132119205920026e-05, + "loss": 0.3682, + "step": 385 + }, + { + "epoch": 0.94, + "learning_rate": 4.7113531619441984e-05, + "loss": 0.3684, + "step": 386 + }, + { + "epoch": 0.94, + "learning_rate": 4.709488768119266e-05, + "loss": 0.4049, + "step": 387 + }, + { + "epoch": 0.94, + "learning_rate": 4.707618743868226e-05, + "loss": 0.3852, + "step": 388 + }, + { + "epoch": 0.95, + "learning_rate": 4.705743093956452e-05, + "loss": 0.4162, + "step": 389 + }, + { + "epoch": 0.95, + "learning_rate": 4.703861823163649e-05, + "loss": 0.353, + "step": 390 + }, + { + "epoch": 0.95, + "learning_rate": 4.7019749362838476e-05, + "loss": 0.3958, + "step": 391 + }, + { + "epoch": 0.95, + "learning_rate": 4.7000824381253905e-05, + "loss": 0.406, + "step": 392 + }, + { + "epoch": 0.96, + "learning_rate": 4.6981843335109174e-05, + "loss": 0.3851, + "step": 393 + }, + { + "epoch": 0.96, + "learning_rate": 4.6962806272773564e-05, + "loss": 0.3828, + "step": 394 + }, + { + "epoch": 0.96, + "learning_rate": 4.69437132427591e-05, + "loss": 0.4331, + "step": 395 + }, + { + "epoch": 0.96, + "learning_rate": 4.6924564293720434e-05, + "loss": 0.3946, + "step": 396 + }, + { + "epoch": 0.97, + "learning_rate": 4.6905359474454705e-05, + "loss": 0.3799, + "step": 397 + }, + { + "epoch": 0.97, + "learning_rate": 4.6886098833901436e-05, + "loss": 0.3543, + "step": 398 + }, + { + "epoch": 0.97, + "learning_rate": 4.686678242114239e-05, + "loss": 0.3772, + "step": 399 + }, + { + "epoch": 0.97, + "learning_rate": 4.684741028540146e-05, + "loss": 0.4009, + "step": 400 + }, + { + "epoch": 0.98, + "learning_rate": 4.6827982476044534e-05, + "loss": 0.3806, + "step": 401 + }, + { + "epoch": 0.98, + "learning_rate": 4.680849904257938e-05, + "loss": 0.3781, + "step": 402 + }, + { + "epoch": 0.98, + "learning_rate": 4.678896003465549e-05, + "loss": 0.4264, + "step": 403 + }, + { + "epoch": 0.98, + "learning_rate": 4.6769365502064025e-05, + "loss": 0.3857, + "step": 404 + }, + { + "epoch": 0.99, + "learning_rate": 4.674971549473757e-05, + "loss": 0.3797, + "step": 405 + }, + { + "epoch": 0.99, + "learning_rate": 4.6730010062750134e-05, + "loss": 0.3847, + "step": 406 + }, + { + "epoch": 0.99, + "learning_rate": 4.671024925631694e-05, + "loss": 0.382, + "step": 407 + }, + { + "epoch": 0.99, + "learning_rate": 4.669043312579433e-05, + "loss": 0.3778, + "step": 408 + }, + { + "epoch": 1.0, + "learning_rate": 4.667056172167962e-05, + "loss": 0.3837, + "step": 409 + }, + { + "epoch": 1.0, + "learning_rate": 4.665063509461097e-05, + "loss": 0.3807, + "step": 410 + }, + { + "epoch": 1.0, + "eval_loss": 0.5687975287437439, + "eval_runtime": 116.1454, + "eval_samples_per_second": 6.561, + "eval_steps_per_second": 0.413, + "step": 410 + }, + { + "epoch": 1.0, + "learning_rate": 4.6630653295367286e-05, + "loss": 0.3618, + "step": 411 + }, + { + "epoch": 1.0, + "learning_rate": 4.6610616374868066e-05, + "loss": 0.2856, + "step": 412 + }, + { + "epoch": 1.01, + "learning_rate": 4.659052438417326e-05, + "loss": 0.2727, + "step": 413 + }, + { + "epoch": 1.01, + "learning_rate": 4.6570377374483154e-05, + "loss": 0.2632, + "step": 414 + }, + { + "epoch": 1.01, + "learning_rate": 4.6550175397138253e-05, + "loss": 0.2758, + "step": 415 + }, + { + "epoch": 1.01, + "learning_rate": 4.652991850361912e-05, + "loss": 0.2561, + "step": 416 + }, + { + "epoch": 1.01, + "learning_rate": 4.650960674554627e-05, + "loss": 0.2807, + "step": 417 + }, + { + "epoch": 1.02, + "learning_rate": 4.648924017468003e-05, + "loss": 0.2686, + "step": 418 + }, + { + "epoch": 1.02, + "learning_rate": 4.64688188429204e-05, + "loss": 0.2584, + "step": 419 + }, + { + "epoch": 1.02, + "learning_rate": 4.644834280230692e-05, + "loss": 0.2368, + "step": 420 + }, + { + "epoch": 1.02, + "learning_rate": 4.6427812105018576e-05, + "loss": 0.2642, + "step": 421 + }, + { + "epoch": 1.03, + "learning_rate": 4.6407226803373586e-05, + "loss": 0.2476, + "step": 422 + }, + { + "epoch": 1.03, + "learning_rate": 4.6386586949829356e-05, + "loss": 0.249, + "step": 423 + }, + { + "epoch": 1.03, + "learning_rate": 4.6365892596982297e-05, + "loss": 0.2541, + "step": 424 + }, + { + "epoch": 1.03, + "learning_rate": 4.634514379756769e-05, + "loss": 0.2785, + "step": 425 + }, + { + "epoch": 1.04, + "learning_rate": 4.632434060445956e-05, + "loss": 0.2369, + "step": 426 + }, + { + "epoch": 1.04, + "learning_rate": 4.630348307067057e-05, + "loss": 0.27, + "step": 427 + }, + { + "epoch": 1.04, + "learning_rate": 4.6282571249351826e-05, + "loss": 0.2603, + "step": 428 + }, + { + "epoch": 1.04, + "learning_rate": 4.626160519379279e-05, + "loss": 0.2498, + "step": 429 + }, + { + "epoch": 1.05, + "learning_rate": 4.624058495742114e-05, + "loss": 0.2654, + "step": 430 + }, + { + "epoch": 1.05, + "learning_rate": 4.621951059380258e-05, + "loss": 0.2316, + "step": 431 + }, + { + "epoch": 1.05, + "learning_rate": 4.619838215664082e-05, + "loss": 0.2515, + "step": 432 + }, + { + "epoch": 1.05, + "learning_rate": 4.6177199699777285e-05, + "loss": 0.2387, + "step": 433 + }, + { + "epoch": 1.06, + "learning_rate": 4.615596327719111e-05, + "loss": 0.2628, + "step": 434 + }, + { + "epoch": 1.06, + "learning_rate": 4.613467294299892e-05, + "loss": 0.2586, + "step": 435 + }, + { + "epoch": 1.06, + "learning_rate": 4.611332875145477e-05, + "loss": 0.2698, + "step": 436 + }, + { + "epoch": 1.06, + "learning_rate": 4.609193075694989e-05, + "loss": 0.254, + "step": 437 + }, + { + "epoch": 1.07, + "learning_rate": 4.607047901401267e-05, + "loss": 0.2585, + "step": 438 + }, + { + "epoch": 1.07, + "learning_rate": 4.604897357730845e-05, + "loss": 0.2311, + "step": 439 + }, + { + "epoch": 1.07, + "learning_rate": 4.60274145016394e-05, + "loss": 0.2714, + "step": 440 + }, + { + "epoch": 1.07, + "learning_rate": 4.600580184194436e-05, + "loss": 0.2536, + "step": 441 + }, + { + "epoch": 1.08, + "learning_rate": 4.598413565329875e-05, + "loss": 0.2485, + "step": 442 + }, + { + "epoch": 1.08, + "learning_rate": 4.5962415990914375e-05, + "loss": 0.2466, + "step": 443 + }, + { + "epoch": 1.08, + "learning_rate": 4.59406429101393e-05, + "loss": 0.2465, + "step": 444 + }, + { + "epoch": 1.08, + "learning_rate": 4.5918816466457746e-05, + "loss": 0.2478, + "step": 445 + }, + { + "epoch": 1.09, + "learning_rate": 4.5896936715489885e-05, + "loss": 0.2733, + "step": 446 + }, + { + "epoch": 1.09, + "learning_rate": 4.587500371299176e-05, + "loss": 0.2444, + "step": 447 + }, + { + "epoch": 1.09, + "learning_rate": 4.585301751485508e-05, + "loss": 0.2629, + "step": 448 + }, + { + "epoch": 1.09, + "learning_rate": 4.583097817710716e-05, + "loss": 0.2702, + "step": 449 + }, + { + "epoch": 1.1, + "learning_rate": 4.580888575591068e-05, + "loss": 0.2694, + "step": 450 + }, + { + "epoch": 1.1, + "learning_rate": 4.5786740307563636e-05, + "loss": 0.2578, + "step": 451 + }, + { + "epoch": 1.1, + "learning_rate": 4.576454188849911e-05, + "loss": 0.2516, + "step": 452 + }, + { + "epoch": 1.1, + "learning_rate": 4.574229055528522e-05, + "loss": 0.2685, + "step": 453 + }, + { + "epoch": 1.1, + "learning_rate": 4.5719986364624866e-05, + "loss": 0.2617, + "step": 454 + }, + { + "epoch": 1.11, + "learning_rate": 4.569762937335569e-05, + "loss": 0.2532, + "step": 455 + }, + { + "epoch": 1.11, + "learning_rate": 4.5675219638449876e-05, + "loss": 0.2885, + "step": 456 + }, + { + "epoch": 1.11, + "learning_rate": 4.5652757217013995e-05, + "loss": 0.2597, + "step": 457 + }, + { + "epoch": 1.11, + "learning_rate": 4.5630242166288895e-05, + "loss": 0.266, + "step": 458 + }, + { + "epoch": 1.12, + "learning_rate": 4.5607674543649546e-05, + "loss": 0.254, + "step": 459 + }, + { + "epoch": 1.12, + "learning_rate": 4.5585054406604864e-05, + "loss": 0.2702, + "step": 460 + }, + { + "epoch": 1.12, + "learning_rate": 4.556238181279761e-05, + "loss": 0.2475, + "step": 461 + }, + { + "epoch": 1.12, + "learning_rate": 4.5539656820004194e-05, + "loss": 0.2458, + "step": 462 + }, + { + "epoch": 1.13, + "learning_rate": 4.551687948613459e-05, + "loss": 0.2492, + "step": 463 + }, + { + "epoch": 1.13, + "learning_rate": 4.5494049869232125e-05, + "loss": 0.269, + "step": 464 + }, + { + "epoch": 1.13, + "learning_rate": 4.5471168027473356e-05, + "loss": 0.2646, + "step": 465 + }, + { + "epoch": 1.13, + "learning_rate": 4.5448234019167945e-05, + "loss": 0.2459, + "step": 466 + }, + { + "epoch": 1.14, + "learning_rate": 4.5425247902758474e-05, + "loss": 0.2762, + "step": 467 + }, + { + "epoch": 1.14, + "learning_rate": 4.540220973682032e-05, + "loss": 0.2511, + "step": 468 + }, + { + "epoch": 1.14, + "learning_rate": 4.537911958006149e-05, + "loss": 0.252, + "step": 469 + }, + { + "epoch": 1.14, + "learning_rate": 4.5355977491322485e-05, + "loss": 0.2679, + "step": 470 + }, + { + "epoch": 1.15, + "learning_rate": 4.5332783529576146e-05, + "loss": 0.2551, + "step": 471 + }, + { + "epoch": 1.15, + "learning_rate": 4.530953775392749e-05, + "loss": 0.2731, + "step": 472 + }, + { + "epoch": 1.15, + "learning_rate": 4.5286240223613584e-05, + "loss": 0.2612, + "step": 473 + }, + { + "epoch": 1.15, + "learning_rate": 4.526289099800337e-05, + "loss": 0.2739, + "step": 474 + }, + { + "epoch": 1.16, + "learning_rate": 4.523949013659753e-05, + "loss": 0.2644, + "step": 475 + }, + { + "epoch": 1.16, + "learning_rate": 4.521603769902835e-05, + "loss": 0.2811, + "step": 476 + }, + { + "epoch": 1.16, + "learning_rate": 4.519253374505949e-05, + "loss": 0.2624, + "step": 477 + }, + { + "epoch": 1.16, + "learning_rate": 4.5168978334585956e-05, + "loss": 0.2552, + "step": 478 + }, + { + "epoch": 1.17, + "learning_rate": 4.514537152763384e-05, + "loss": 0.27, + "step": 479 + }, + { + "epoch": 1.17, + "learning_rate": 4.5121713384360215e-05, + "loss": 0.2652, + "step": 480 + }, + { + "epoch": 1.17, + "learning_rate": 4.5098003965052984e-05, + "loss": 0.2698, + "step": 481 + }, + { + "epoch": 1.17, + "learning_rate": 4.507424333013069e-05, + "loss": 0.2585, + "step": 482 + }, + { + "epoch": 1.18, + "learning_rate": 4.505043154014243e-05, + "loss": 0.2573, + "step": 483 + }, + { + "epoch": 1.18, + "learning_rate": 4.502656865576762e-05, + "loss": 0.2561, + "step": 484 + }, + { + "epoch": 1.18, + "learning_rate": 4.5002654737815905e-05, + "loss": 0.2629, + "step": 485 + }, + { + "epoch": 1.18, + "learning_rate": 4.497868984722697e-05, + "loss": 0.2696, + "step": 486 + }, + { + "epoch": 1.19, + "learning_rate": 4.4954674045070387e-05, + "loss": 0.2727, + "step": 487 + }, + { + "epoch": 1.19, + "learning_rate": 4.493060739254548e-05, + "loss": 0.2718, + "step": 488 + }, + { + "epoch": 1.19, + "learning_rate": 4.4906489950981126e-05, + "loss": 0.2537, + "step": 489 + }, + { + "epoch": 1.19, + "learning_rate": 4.488232178183567e-05, + "loss": 0.2565, + "step": 490 + }, + { + "epoch": 1.2, + "learning_rate": 4.4858102946696676e-05, + "loss": 0.2554, + "step": 491 + }, + { + "epoch": 1.2, + "learning_rate": 4.4833833507280884e-05, + "loss": 0.2904, + "step": 492 + }, + { + "epoch": 1.2, + "learning_rate": 4.4809513525433925e-05, + "loss": 0.262, + "step": 493 + }, + { + "epoch": 1.2, + "learning_rate": 4.478514306313025e-05, + "loss": 0.2537, + "step": 494 + }, + { + "epoch": 1.2, + "learning_rate": 4.476072218247297e-05, + "loss": 0.2583, + "step": 495 + }, + { + "epoch": 1.21, + "learning_rate": 4.4736250945693655e-05, + "loss": 0.2712, + "step": 496 + }, + { + "epoch": 1.21, + "learning_rate": 4.471172941515219e-05, + "loss": 0.257, + "step": 497 + }, + { + "epoch": 1.21, + "learning_rate": 4.468715765333664e-05, + "loss": 0.2617, + "step": 498 + }, + { + "epoch": 1.21, + "learning_rate": 4.466253572286308e-05, + "loss": 0.2528, + "step": 499 + }, + { + "epoch": 1.22, + "learning_rate": 4.46378636864754e-05, + "loss": 0.2711, + "step": 500 + }, + { + "epoch": 1.22, + "learning_rate": 4.46131416070452e-05, + "loss": 0.2568, + "step": 501 + }, + { + "epoch": 1.22, + "learning_rate": 4.458836954757161e-05, + "loss": 0.2702, + "step": 502 + }, + { + "epoch": 1.22, + "learning_rate": 4.4563547571181086e-05, + "loss": 0.2596, + "step": 503 + }, + { + "epoch": 1.23, + "learning_rate": 4.4538675741127326e-05, + "loss": 0.2478, + "step": 504 + }, + { + "epoch": 1.23, + "learning_rate": 4.451375412079106e-05, + "loss": 0.2438, + "step": 505 + }, + { + "epoch": 1.23, + "learning_rate": 4.4488782773679885e-05, + "loss": 0.2797, + "step": 506 + }, + { + "epoch": 1.23, + "learning_rate": 4.4463761763428125e-05, + "loss": 0.2355, + "step": 507 + }, + { + "epoch": 1.24, + "learning_rate": 4.443869115379667e-05, + "loss": 0.2718, + "step": 508 + }, + { + "epoch": 1.24, + "learning_rate": 4.441357100867278e-05, + "loss": 0.2654, + "step": 509 + }, + { + "epoch": 1.24, + "learning_rate": 4.4388401392069975e-05, + "loss": 0.2776, + "step": 510 + }, + { + "epoch": 1.24, + "learning_rate": 4.4363182368127824e-05, + "loss": 0.2631, + "step": 511 + }, + { + "epoch": 1.25, + "learning_rate": 4.433791400111179e-05, + "loss": 0.2599, + "step": 512 + }, + { + "epoch": 1.25, + "learning_rate": 4.4312596355413116e-05, + "loss": 0.2629, + "step": 513 + }, + { + "epoch": 1.25, + "learning_rate": 4.428722949554857e-05, + "loss": 0.25, + "step": 514 + }, + { + "epoch": 1.25, + "learning_rate": 4.426181348616039e-05, + "loss": 0.2557, + "step": 515 + }, + { + "epoch": 1.26, + "learning_rate": 4.4236348392016e-05, + "loss": 0.2793, + "step": 516 + }, + { + "epoch": 1.26, + "learning_rate": 4.421083427800795e-05, + "loss": 0.2641, + "step": 517 + }, + { + "epoch": 1.26, + "learning_rate": 4.41852712091537e-05, + "loss": 0.2696, + "step": 518 + }, + { + "epoch": 1.26, + "learning_rate": 4.415965925059544e-05, + "loss": 0.2637, + "step": 519 + }, + { + "epoch": 1.27, + "learning_rate": 4.413399846759998e-05, + "loss": 0.2772, + "step": 520 + }, + { + "epoch": 1.27, + "learning_rate": 4.4108288925558505e-05, + "loss": 0.2432, + "step": 521 + }, + { + "epoch": 1.27, + "learning_rate": 4.40825306899865e-05, + "loss": 0.2657, + "step": 522 + }, + { + "epoch": 1.27, + "learning_rate": 4.405672382652349e-05, + "loss": 0.2635, + "step": 523 + }, + { + "epoch": 1.28, + "learning_rate": 4.403086840093297e-05, + "loss": 0.2551, + "step": 524 + }, + { + "epoch": 1.28, + "learning_rate": 4.400496447910212e-05, + "loss": 0.2555, + "step": 525 + }, + { + "epoch": 1.28, + "learning_rate": 4.397901212704176e-05, + "loss": 0.2785, + "step": 526 + }, + { + "epoch": 1.28, + "learning_rate": 4.395301141088611e-05, + "loss": 0.2866, + "step": 527 + }, + { + "epoch": 1.29, + "learning_rate": 4.3926962396892606e-05, + "loss": 0.256, + "step": 528 + }, + { + "epoch": 1.29, + "learning_rate": 4.3900865151441796e-05, + "loss": 0.2585, + "step": 529 + }, + { + "epoch": 1.29, + "learning_rate": 4.387471974103713e-05, + "loss": 0.265, + "step": 530 + }, + { + "epoch": 1.29, + "learning_rate": 4.384852623230478e-05, + "loss": 0.2445, + "step": 531 + }, + { + "epoch": 1.29, + "learning_rate": 4.38222846919935e-05, + "loss": 0.2608, + "step": 532 + }, + { + "epoch": 1.3, + "learning_rate": 4.379599518697444e-05, + "loss": 0.2823, + "step": 533 + }, + { + "epoch": 1.3, + "learning_rate": 4.3769657784240976e-05, + "loss": 0.2688, + "step": 534 + }, + { + "epoch": 1.3, + "learning_rate": 4.3743272550908543e-05, + "loss": 0.2572, + "step": 535 + }, + { + "epoch": 1.3, + "learning_rate": 4.371683955421447e-05, + "loss": 0.2635, + "step": 536 + }, + { + "epoch": 1.31, + "learning_rate": 4.369035886151778e-05, + "loss": 0.2713, + "step": 537 + }, + { + "epoch": 1.31, + "learning_rate": 4.366383054029906e-05, + "loss": 0.2651, + "step": 538 + }, + { + "epoch": 1.31, + "learning_rate": 4.363725465816028e-05, + "loss": 0.2487, + "step": 539 + }, + { + "epoch": 1.31, + "learning_rate": 4.3610631282824556e-05, + "loss": 0.2513, + "step": 540 + }, + { + "epoch": 1.32, + "learning_rate": 4.3583960482136085e-05, + "loss": 0.2683, + "step": 541 + }, + { + "epoch": 1.32, + "learning_rate": 4.3557242324059896e-05, + "loss": 0.2706, + "step": 542 + }, + { + "epoch": 1.32, + "learning_rate": 4.3530476876681696e-05, + "loss": 0.2741, + "step": 543 + }, + { + "epoch": 1.32, + "learning_rate": 4.350366420820771e-05, + "loss": 0.238, + "step": 544 + }, + { + "epoch": 1.33, + "learning_rate": 4.347680438696449e-05, + "loss": 0.2656, + "step": 545 + }, + { + "epoch": 1.33, + "learning_rate": 4.344989748139873e-05, + "loss": 0.2534, + "step": 546 + }, + { + "epoch": 1.33, + "learning_rate": 4.342294356007715e-05, + "loss": 0.2832, + "step": 547 + }, + { + "epoch": 1.33, + "learning_rate": 4.339594269168624e-05, + "loss": 0.259, + "step": 548 + }, + { + "epoch": 1.34, + "learning_rate": 4.3368894945032146e-05, + "loss": 0.2734, + "step": 549 + }, + { + "epoch": 1.34, + "learning_rate": 4.334180038904046e-05, + "loss": 0.2805, + "step": 550 + }, + { + "epoch": 1.34, + "learning_rate": 4.331465909275608e-05, + "loss": 0.2837, + "step": 551 + }, + { + "epoch": 1.34, + "learning_rate": 4.3287471125342996e-05, + "loss": 0.2763, + "step": 552 + }, + { + "epoch": 1.35, + "learning_rate": 4.326023655608411e-05, + "loss": 0.2678, + "step": 553 + }, + { + "epoch": 1.35, + "learning_rate": 4.323295545438112e-05, + "loss": 0.2583, + "step": 554 + }, + { + "epoch": 1.35, + "learning_rate": 4.3205627889754286e-05, + "loss": 0.2657, + "step": 555 + }, + { + "epoch": 1.35, + "learning_rate": 4.317825393184226e-05, + "loss": 0.2653, + "step": 556 + }, + { + "epoch": 1.36, + "learning_rate": 4.315083365040192e-05, + "loss": 0.2566, + "step": 557 + }, + { + "epoch": 1.36, + "learning_rate": 4.31233671153082e-05, + "loss": 0.2443, + "step": 558 + }, + { + "epoch": 1.36, + "learning_rate": 4.309585439655389e-05, + "loss": 0.262, + "step": 559 + }, + { + "epoch": 1.36, + "learning_rate": 4.306829556424948e-05, + "loss": 0.2639, + "step": 560 + }, + { + "epoch": 1.37, + "learning_rate": 4.304069068862296e-05, + "loss": 0.2558, + "step": 561 + }, + { + "epoch": 1.37, + "learning_rate": 4.301303984001967e-05, + "loss": 0.2791, + "step": 562 + }, + { + "epoch": 1.37, + "learning_rate": 4.298534308890209e-05, + "loss": 0.2564, + "step": 563 + }, + { + "epoch": 1.37, + "learning_rate": 4.295760050584966e-05, + "loss": 0.2749, + "step": 564 + }, + { + "epoch": 1.38, + "learning_rate": 4.2929812161558636e-05, + "loss": 0.2839, + "step": 565 + }, + { + "epoch": 1.38, + "learning_rate": 4.290197812684188e-05, + "loss": 0.2432, + "step": 566 + }, + { + "epoch": 1.38, + "learning_rate": 4.2874098472628675e-05, + "loss": 0.2716, + "step": 567 + }, + { + "epoch": 1.38, + "learning_rate": 4.284617326996458e-05, + "loss": 0.2628, + "step": 568 + }, + { + "epoch": 1.38, + "learning_rate": 4.28182025900112e-05, + "loss": 0.2797, + "step": 569 + }, + { + "epoch": 1.39, + "learning_rate": 4.279018650404604e-05, + "loss": 0.2704, + "step": 570 + }, + { + "epoch": 1.39, + "learning_rate": 4.276212508346232e-05, + "loss": 0.2701, + "step": 571 + }, + { + "epoch": 1.39, + "learning_rate": 4.273401839976877e-05, + "loss": 0.2713, + "step": 572 + }, + { + "epoch": 1.39, + "learning_rate": 4.270586652458948e-05, + "loss": 0.249, + "step": 573 + }, + { + "epoch": 1.4, + "learning_rate": 4.267766952966369e-05, + "loss": 0.2755, + "step": 574 + }, + { + "epoch": 1.4, + "learning_rate": 4.264942748684563e-05, + "loss": 0.2882, + "step": 575 + }, + { + "epoch": 1.4, + "learning_rate": 4.2621140468104295e-05, + "loss": 0.2686, + "step": 576 + }, + { + "epoch": 1.4, + "learning_rate": 4.2592808545523335e-05, + "loss": 0.2901, + "step": 577 + }, + { + "epoch": 1.41, + "learning_rate": 4.256443179130081e-05, + "loss": 0.251, + "step": 578 + }, + { + "epoch": 1.41, + "learning_rate": 4.2536010277748996e-05, + "loss": 0.2522, + "step": 579 + }, + { + "epoch": 1.41, + "learning_rate": 4.250754407729428e-05, + "loss": 0.2773, + "step": 580 + }, + { + "epoch": 1.41, + "learning_rate": 4.2479033262476884e-05, + "loss": 0.2574, + "step": 581 + }, + { + "epoch": 1.42, + "learning_rate": 4.245047790595075e-05, + "loss": 0.2835, + "step": 582 + }, + { + "epoch": 1.42, + "learning_rate": 4.242187808048329e-05, + "loss": 0.2646, + "step": 583 + }, + { + "epoch": 1.42, + "learning_rate": 4.239323385895527e-05, + "loss": 0.2786, + "step": 584 + }, + { + "epoch": 1.42, + "learning_rate": 4.2364545314360585e-05, + "loss": 0.282, + "step": 585 + }, + { + "epoch": 1.43, + "learning_rate": 4.233581251980604e-05, + "loss": 0.2655, + "step": 586 + }, + { + "epoch": 1.43, + "learning_rate": 4.2307035548511265e-05, + "loss": 0.259, + "step": 587 + }, + { + "epoch": 1.43, + "learning_rate": 4.227821447380842e-05, + "loss": 0.2476, + "step": 588 + }, + { + "epoch": 1.43, + "learning_rate": 4.224934936914206e-05, + "loss": 0.2628, + "step": 589 + }, + { + "epoch": 1.44, + "learning_rate": 4.222044030806894e-05, + "loss": 0.2629, + "step": 590 + }, + { + "epoch": 1.44, + "learning_rate": 4.2191487364257854e-05, + "loss": 0.2776, + "step": 591 + }, + { + "epoch": 1.44, + "learning_rate": 4.216249061148939e-05, + "loss": 0.2549, + "step": 592 + }, + { + "epoch": 1.44, + "learning_rate": 4.21334501236558e-05, + "loss": 0.2513, + "step": 593 + }, + { + "epoch": 1.45, + "learning_rate": 4.210436597476076e-05, + "loss": 0.2596, + "step": 594 + }, + { + "epoch": 1.45, + "learning_rate": 4.207523823891923e-05, + "loss": 0.2767, + "step": 595 + }, + { + "epoch": 1.45, + "learning_rate": 4.2046066990357235e-05, + "loss": 0.2735, + "step": 596 + }, + { + "epoch": 1.45, + "learning_rate": 4.201685230341168e-05, + "loss": 0.2487, + "step": 597 + }, + { + "epoch": 1.46, + "learning_rate": 4.198759425253014e-05, + "loss": 0.2558, + "step": 598 + }, + { + "epoch": 1.46, + "learning_rate": 4.195829291227076e-05, + "loss": 0.2773, + "step": 599 + }, + { + "epoch": 1.46, + "learning_rate": 4.192894835730193e-05, + "loss": 0.2716, + "step": 600 + }, + { + "epoch": 1.46, + "learning_rate": 4.1899560662402206e-05, + "loss": 0.2724, + "step": 601 + }, + { + "epoch": 1.47, + "learning_rate": 4.1870129902460056e-05, + "loss": 0.27, + "step": 602 + }, + { + "epoch": 1.47, + "learning_rate": 4.18406561524737e-05, + "loss": 0.2594, + "step": 603 + }, + { + "epoch": 1.47, + "learning_rate": 4.18111394875509e-05, + "loss": 0.2581, + "step": 604 + }, + { + "epoch": 1.47, + "learning_rate": 4.178157998290879e-05, + "loss": 0.265, + "step": 605 + }, + { + "epoch": 1.47, + "learning_rate": 4.175197771387368e-05, + "loss": 0.2653, + "step": 606 + }, + { + "epoch": 1.48, + "learning_rate": 4.172233275588082e-05, + "loss": 0.2808, + "step": 607 + }, + { + "epoch": 1.48, + "learning_rate": 4.169264518447428e-05, + "loss": 0.27, + "step": 608 + }, + { + "epoch": 1.48, + "learning_rate": 4.16629150753067e-05, + "loss": 0.2522, + "step": 609 + }, + { + "epoch": 1.48, + "learning_rate": 4.163314250413913e-05, + "loss": 0.253, + "step": 610 + }, + { + "epoch": 1.49, + "learning_rate": 4.160332754684084e-05, + "loss": 0.2572, + "step": 611 + }, + { + "epoch": 1.49, + "learning_rate": 4.157347027938907e-05, + "loss": 0.2799, + "step": 612 + }, + { + "epoch": 1.49, + "learning_rate": 4.1543570777868924e-05, + "loss": 0.2816, + "step": 613 + }, + { + "epoch": 1.49, + "learning_rate": 4.151362911847309e-05, + "loss": 0.2859, + "step": 614 + }, + { + "epoch": 1.5, + "learning_rate": 4.148364537750172e-05, + "loss": 0.2601, + "step": 615 + }, + { + "epoch": 1.5, + "learning_rate": 4.1453619631362195e-05, + "loss": 0.2725, + "step": 616 + }, + { + "epoch": 1.5, + "learning_rate": 4.142355195656892e-05, + "loss": 0.2669, + "step": 617 + }, + { + "epoch": 1.5, + "learning_rate": 4.1393442429743166e-05, + "loss": 0.2955, + "step": 618 + }, + { + "epoch": 1.51, + "learning_rate": 4.1363291127612845e-05, + "loss": 0.2655, + "step": 619 + }, + { + "epoch": 1.51, + "learning_rate": 4.1333098127012326e-05, + "loss": 0.2545, + "step": 620 + }, + { + "epoch": 1.51, + "learning_rate": 4.130286350488224e-05, + "loss": 0.2724, + "step": 621 + }, + { + "epoch": 1.51, + "learning_rate": 4.127258733826929e-05, + "loss": 0.2633, + "step": 622 + }, + { + "epoch": 1.52, + "learning_rate": 4.124226970432602e-05, + "loss": 0.2643, + "step": 623 + }, + { + "epoch": 1.52, + "learning_rate": 4.121191068031067e-05, + "loss": 0.2817, + "step": 624 + }, + { + "epoch": 1.52, + "learning_rate": 4.118151034358696e-05, + "loss": 0.263, + "step": 625 + }, + { + "epoch": 1.52, + "learning_rate": 4.1151068771623866e-05, + "loss": 0.2869, + "step": 626 + }, + { + "epoch": 1.53, + "learning_rate": 4.112058604199544e-05, + "loss": 0.2666, + "step": 627 + }, + { + "epoch": 1.53, + "learning_rate": 4.109006223238064e-05, + "loss": 0.2692, + "step": 628 + }, + { + "epoch": 1.53, + "learning_rate": 4.1059497420563094e-05, + "loss": 0.2615, + "step": 629 + }, + { + "epoch": 1.53, + "learning_rate": 4.102889168443091e-05, + "loss": 0.2571, + "step": 630 + }, + { + "epoch": 1.54, + "learning_rate": 4.099824510197649e-05, + "loss": 0.2914, + "step": 631 + }, + { + "epoch": 1.54, + "learning_rate": 4.0967557751296336e-05, + "loss": 0.2808, + "step": 632 + }, + { + "epoch": 1.54, + "learning_rate": 4.093682971059081e-05, + "loss": 0.2658, + "step": 633 + }, + { + "epoch": 1.54, + "learning_rate": 4.0906061058163995e-05, + "loss": 0.2727, + "step": 634 + }, + { + "epoch": 1.55, + "learning_rate": 4.087525187242345e-05, + "loss": 0.2541, + "step": 635 + }, + { + "epoch": 1.55, + "learning_rate": 4.0844402231880016e-05, + "loss": 0.2676, + "step": 636 + }, + { + "epoch": 1.55, + "learning_rate": 4.0813512215147654e-05, + "loss": 0.2555, + "step": 637 + }, + { + "epoch": 1.55, + "learning_rate": 4.078258190094318e-05, + "loss": 0.2597, + "step": 638 + }, + { + "epoch": 1.56, + "learning_rate": 4.075161136808612e-05, + "loss": 0.2589, + "step": 639 + }, + { + "epoch": 1.56, + "learning_rate": 4.0720600695498486e-05, + "loss": 0.2852, + "step": 640 + }, + { + "epoch": 1.56, + "learning_rate": 4.068954996220457e-05, + "loss": 0.2557, + "step": 641 + }, + { + "epoch": 1.56, + "learning_rate": 4.0658459247330766e-05, + "loss": 0.2697, + "step": 642 + }, + { + "epoch": 1.56, + "learning_rate": 4.062732863010534e-05, + "loss": 0.2678, + "step": 643 + }, + { + "epoch": 1.57, + "learning_rate": 4.0596158189858255e-05, + "loss": 0.2631, + "step": 644 + }, + { + "epoch": 1.57, + "learning_rate": 4.0564948006020934e-05, + "loss": 0.2559, + "step": 645 + }, + { + "epoch": 1.57, + "learning_rate": 4.0533698158126085e-05, + "loss": 0.2833, + "step": 646 + }, + { + "epoch": 1.57, + "learning_rate": 4.050240872580749e-05, + "loss": 0.2542, + "step": 647 + }, + { + "epoch": 1.58, + "learning_rate": 4.047107978879985e-05, + "loss": 0.28, + "step": 648 + }, + { + "epoch": 1.58, + "learning_rate": 4.043971142693844e-05, + "loss": 0.2607, + "step": 649 + }, + { + "epoch": 1.58, + "learning_rate": 4.040830372015909e-05, + "loss": 0.278, + "step": 650 + }, + { + "epoch": 1.58, + "learning_rate": 4.037685674849786e-05, + "loss": 0.2569, + "step": 651 + }, + { + "epoch": 1.59, + "learning_rate": 4.034537059209085e-05, + "loss": 0.2844, + "step": 652 + }, + { + "epoch": 1.59, + "learning_rate": 4.0313845331174036e-05, + "loss": 0.2639, + "step": 653 + }, + { + "epoch": 1.59, + "learning_rate": 4.0282281046083045e-05, + "loss": 0.2735, + "step": 654 + }, + { + "epoch": 1.59, + "learning_rate": 4.025067781725294e-05, + "loss": 0.2713, + "step": 655 + }, + { + "epoch": 1.6, + "learning_rate": 4.021903572521802e-05, + "loss": 0.2515, + "step": 656 + }, + { + "epoch": 1.6, + "learning_rate": 4.0187354850611636e-05, + "loss": 0.2651, + "step": 657 + }, + { + "epoch": 1.6, + "learning_rate": 4.015563527416595e-05, + "loss": 0.2788, + "step": 658 + }, + { + "epoch": 1.6, + "learning_rate": 4.012387707671177e-05, + "loss": 0.2753, + "step": 659 + }, + { + "epoch": 1.61, + "learning_rate": 4.00920803391783e-05, + "loss": 0.2589, + "step": 660 + }, + { + "epoch": 1.61, + "learning_rate": 4.0060245142592944e-05, + "loss": 0.2748, + "step": 661 + }, + { + "epoch": 1.61, + "learning_rate": 4.002837156808116e-05, + "loss": 0.2559, + "step": 662 + }, + { + "epoch": 1.61, + "learning_rate": 3.999645969686616e-05, + "loss": 0.2563, + "step": 663 + }, + { + "epoch": 1.62, + "learning_rate": 3.996450961026876e-05, + "loss": 0.251, + "step": 664 + }, + { + "epoch": 1.62, + "learning_rate": 3.9932521389707155e-05, + "loss": 0.2661, + "step": 665 + }, + { + "epoch": 1.62, + "learning_rate": 3.990049511669675e-05, + "loss": 0.2563, + "step": 666 + }, + { + "epoch": 1.62, + "learning_rate": 3.986843087284986e-05, + "loss": 0.2754, + "step": 667 + }, + { + "epoch": 1.63, + "learning_rate": 3.9836328739875615e-05, + "loss": 0.2591, + "step": 668 + }, + { + "epoch": 1.63, + "learning_rate": 3.980418879957967e-05, + "loss": 0.2764, + "step": 669 + }, + { + "epoch": 1.63, + "learning_rate": 3.977201113386402e-05, + "loss": 0.2801, + "step": 670 + }, + { + "epoch": 1.63, + "learning_rate": 3.9739795824726804e-05, + "loss": 0.2768, + "step": 671 + }, + { + "epoch": 1.64, + "learning_rate": 3.9707542954262115e-05, + "loss": 0.2933, + "step": 672 + }, + { + "epoch": 1.64, + "learning_rate": 3.96752526046597e-05, + "loss": 0.2757, + "step": 673 + }, + { + "epoch": 1.64, + "learning_rate": 3.964292485820487e-05, + "loss": 0.2557, + "step": 674 + }, + { + "epoch": 1.64, + "learning_rate": 3.9610559797278216e-05, + "loss": 0.2624, + "step": 675 + }, + { + "epoch": 1.65, + "learning_rate": 3.957815750435542e-05, + "loss": 0.2618, + "step": 676 + }, + { + "epoch": 1.65, + "learning_rate": 3.954571806200702e-05, + "loss": 0.2689, + "step": 677 + }, + { + "epoch": 1.65, + "learning_rate": 3.951324155289825e-05, + "loss": 0.2581, + "step": 678 + }, + { + "epoch": 1.65, + "learning_rate": 3.9480728059788796e-05, + "loss": 0.2589, + "step": 679 + }, + { + "epoch": 1.66, + "learning_rate": 3.9448177665532574e-05, + "loss": 0.2733, + "step": 680 + }, + { + "epoch": 1.66, + "learning_rate": 3.941559045307755e-05, + "loss": 0.2653, + "step": 681 + }, + { + "epoch": 1.66, + "learning_rate": 3.938296650546552e-05, + "loss": 0.2799, + "step": 682 + }, + { + "epoch": 1.66, + "learning_rate": 3.935030590583186e-05, + "loss": 0.2583, + "step": 683 + }, + { + "epoch": 1.66, + "learning_rate": 3.931760873740539e-05, + "loss": 0.271, + "step": 684 + }, + { + "epoch": 1.67, + "learning_rate": 3.9284875083508076e-05, + "loss": 0.2534, + "step": 685 + }, + { + "epoch": 1.67, + "learning_rate": 3.9252105027554887e-05, + "loss": 0.2576, + "step": 686 + }, + { + "epoch": 1.67, + "learning_rate": 3.9219298653053546e-05, + "loss": 0.2464, + "step": 687 + }, + { + "epoch": 1.67, + "learning_rate": 3.918645604360433e-05, + "loss": 0.2738, + "step": 688 + }, + { + "epoch": 1.68, + "learning_rate": 3.915357728289985e-05, + "loss": 0.2593, + "step": 689 + }, + { + "epoch": 1.68, + "learning_rate": 3.9120662454724836e-05, + "loss": 0.2795, + "step": 690 + }, + { + "epoch": 1.68, + "learning_rate": 3.908771164295595e-05, + "loss": 0.2759, + "step": 691 + }, + { + "epoch": 1.68, + "learning_rate": 3.905472493156151e-05, + "loss": 0.2606, + "step": 692 + }, + { + "epoch": 1.69, + "learning_rate": 3.9021702404601366e-05, + "loss": 0.2867, + "step": 693 + }, + { + "epoch": 1.69, + "learning_rate": 3.8988644146226606e-05, + "loss": 0.2693, + "step": 694 + }, + { + "epoch": 1.69, + "learning_rate": 3.8955550240679364e-05, + "loss": 0.2601, + "step": 695 + }, + { + "epoch": 1.69, + "learning_rate": 3.8922420772292644e-05, + "loss": 0.2574, + "step": 696 + }, + { + "epoch": 1.7, + "learning_rate": 3.888925582549006e-05, + "loss": 0.2737, + "step": 697 + }, + { + "epoch": 1.7, + "learning_rate": 3.8856055484785625e-05, + "loss": 0.2752, + "step": 698 + }, + { + "epoch": 1.7, + "learning_rate": 3.882281983478355e-05, + "loss": 0.2807, + "step": 699 + }, + { + "epoch": 1.7, + "learning_rate": 3.878954896017804e-05, + "loss": 0.2779, + "step": 700 + }, + { + "epoch": 1.71, + "learning_rate": 3.875624294575305e-05, + "loss": 0.2837, + "step": 701 + }, + { + "epoch": 1.71, + "learning_rate": 3.872290187638208e-05, + "loss": 0.268, + "step": 702 + }, + { + "epoch": 1.71, + "learning_rate": 3.8689525837027975e-05, + "loss": 0.2621, + "step": 703 + }, + { + "epoch": 1.71, + "learning_rate": 3.865611491274267e-05, + "loss": 0.2694, + "step": 704 + }, + { + "epoch": 1.72, + "learning_rate": 3.8622669188667015e-05, + "loss": 0.2759, + "step": 705 + }, + { + "epoch": 1.72, + "learning_rate": 3.858918875003053e-05, + "loss": 0.2643, + "step": 706 + }, + { + "epoch": 1.72, + "learning_rate": 3.8555673682151215e-05, + "loss": 0.2663, + "step": 707 + }, + { + "epoch": 1.72, + "learning_rate": 3.852212407043528e-05, + "loss": 0.2871, + "step": 708 + }, + { + "epoch": 1.73, + "learning_rate": 3.8488540000377016e-05, + "loss": 0.2718, + "step": 709 + }, + { + "epoch": 1.73, + "learning_rate": 3.8454921557558476e-05, + "loss": 0.2712, + "step": 710 + }, + { + "epoch": 1.73, + "learning_rate": 3.842126882764933e-05, + "loss": 0.2579, + "step": 711 + }, + { + "epoch": 1.73, + "learning_rate": 3.8387581896406606e-05, + "loss": 0.2695, + "step": 712 + }, + { + "epoch": 1.74, + "learning_rate": 3.835386084967451e-05, + "loss": 0.2619, + "step": 713 + }, + { + "epoch": 1.74, + "learning_rate": 3.8320105773384144e-05, + "loss": 0.2744, + "step": 714 + }, + { + "epoch": 1.74, + "learning_rate": 3.828631675355338e-05, + "loss": 0.2606, + "step": 715 + }, + { + "epoch": 1.74, + "learning_rate": 3.8252493876286546e-05, + "loss": 0.2703, + "step": 716 + }, + { + "epoch": 1.75, + "learning_rate": 3.8218637227774276e-05, + "loss": 0.2657, + "step": 717 + }, + { + "epoch": 1.75, + "learning_rate": 3.818474689429323e-05, + "loss": 0.2827, + "step": 718 + }, + { + "epoch": 1.75, + "learning_rate": 3.8150822962205956e-05, + "loss": 0.263, + "step": 719 + }, + { + "epoch": 1.75, + "learning_rate": 3.8116865517960585e-05, + "loss": 0.2702, + "step": 720 + }, + { + "epoch": 1.75, + "learning_rate": 3.808287464809063e-05, + "loss": 0.2659, + "step": 721 + }, + { + "epoch": 1.76, + "learning_rate": 3.8048850439214844e-05, + "loss": 0.2564, + "step": 722 + }, + { + "epoch": 1.76, + "learning_rate": 3.801479297803687e-05, + "loss": 0.2758, + "step": 723 + }, + { + "epoch": 1.76, + "learning_rate": 3.7980702351345146e-05, + "loss": 0.2742, + "step": 724 + }, + { + "epoch": 1.76, + "learning_rate": 3.7946578646012574e-05, + "loss": 0.2741, + "step": 725 + }, + { + "epoch": 1.77, + "learning_rate": 3.791242194899639e-05, + "loss": 0.2695, + "step": 726 + }, + { + "epoch": 1.77, + "learning_rate": 3.7878232347337875e-05, + "loss": 0.2749, + "step": 727 + }, + { + "epoch": 1.77, + "learning_rate": 3.784400992816219e-05, + "loss": 0.2679, + "step": 728 + }, + { + "epoch": 1.77, + "learning_rate": 3.78097547786781e-05, + "loss": 0.2617, + "step": 729 + }, + { + "epoch": 1.78, + "learning_rate": 3.777546698617776e-05, + "loss": 0.2756, + "step": 730 + }, + { + "epoch": 1.78, + "learning_rate": 3.774114663803657e-05, + "loss": 0.2704, + "step": 731 + }, + { + "epoch": 1.78, + "learning_rate": 3.7706793821712826e-05, + "loss": 0.2742, + "step": 732 + }, + { + "epoch": 1.78, + "learning_rate": 3.76724086247476e-05, + "loss": 0.2686, + "step": 733 + }, + { + "epoch": 1.79, + "learning_rate": 3.763799113476447e-05, + "loss": 0.2548, + "step": 734 + }, + { + "epoch": 1.79, + "learning_rate": 3.7603541439469315e-05, + "loss": 0.2788, + "step": 735 + }, + { + "epoch": 1.79, + "learning_rate": 3.756905962665005e-05, + "loss": 0.2525, + "step": 736 + }, + { + "epoch": 1.79, + "learning_rate": 3.753454578417648e-05, + "loss": 0.2758, + "step": 737 + }, + { + "epoch": 1.8, + "learning_rate": 3.7500000000000003e-05, + "loss": 0.2523, + "step": 738 + }, + { + "epoch": 1.8, + "learning_rate": 3.746542236215341e-05, + "loss": 0.2652, + "step": 739 + }, + { + "epoch": 1.8, + "learning_rate": 3.743081295875069e-05, + "loss": 0.2821, + "step": 740 + }, + { + "epoch": 1.8, + "learning_rate": 3.7396171877986764e-05, + "loss": 0.2833, + "step": 741 + }, + { + "epoch": 1.81, + "learning_rate": 3.7361499208137254e-05, + "loss": 0.2846, + "step": 742 + }, + { + "epoch": 1.81, + "learning_rate": 3.732679503755833e-05, + "loss": 0.2651, + "step": 743 + }, + { + "epoch": 1.81, + "learning_rate": 3.72920594546864e-05, + "loss": 0.2594, + "step": 744 + }, + { + "epoch": 1.81, + "learning_rate": 3.725729254803791e-05, + "loss": 0.2776, + "step": 745 + }, + { + "epoch": 1.82, + "learning_rate": 3.722249440620917e-05, + "loss": 0.2637, + "step": 746 + }, + { + "epoch": 1.82, + "learning_rate": 3.718766511787606e-05, + "loss": 0.2872, + "step": 747 + }, + { + "epoch": 1.82, + "learning_rate": 3.715280477179382e-05, + "loss": 0.2563, + "step": 748 + }, + { + "epoch": 1.82, + "learning_rate": 3.7117913456796854e-05, + "loss": 0.2727, + "step": 749 + }, + { + "epoch": 1.83, + "learning_rate": 3.708299126179847e-05, + "loss": 0.2601, + "step": 750 + }, + { + "epoch": 1.83, + "learning_rate": 3.7048038275790694e-05, + "loss": 0.2784, + "step": 751 + }, + { + "epoch": 1.83, + "learning_rate": 3.701305458784397e-05, + "loss": 0.2644, + "step": 752 + }, + { + "epoch": 1.83, + "learning_rate": 3.697804028710703e-05, + "loss": 0.2705, + "step": 753 + }, + { + "epoch": 1.84, + "learning_rate": 3.694299546280657e-05, + "loss": 0.2628, + "step": 754 + }, + { + "epoch": 1.84, + "learning_rate": 3.690792020424712e-05, + "loss": 0.2715, + "step": 755 + }, + { + "epoch": 1.84, + "learning_rate": 3.687281460081071e-05, + "loss": 0.2728, + "step": 756 + }, + { + "epoch": 1.84, + "learning_rate": 3.683767874195674e-05, + "loss": 0.2767, + "step": 757 + }, + { + "epoch": 1.84, + "learning_rate": 3.680251271722169e-05, + "loss": 0.2652, + "step": 758 + }, + { + "epoch": 1.85, + "learning_rate": 3.676731661621893e-05, + "loss": 0.2726, + "step": 759 + }, + { + "epoch": 1.85, + "learning_rate": 3.673209052863843e-05, + "loss": 0.2687, + "step": 760 + }, + { + "epoch": 1.85, + "learning_rate": 3.6696834544246625e-05, + "loss": 0.2561, + "step": 761 + }, + { + "epoch": 1.85, + "learning_rate": 3.666154875288611e-05, + "loss": 0.2781, + "step": 762 + }, + { + "epoch": 1.86, + "learning_rate": 3.662623324447544e-05, + "loss": 0.2867, + "step": 763 + }, + { + "epoch": 1.86, + "learning_rate": 3.65908881090089e-05, + "loss": 0.2711, + "step": 764 + }, + { + "epoch": 1.86, + "learning_rate": 3.655551343655628e-05, + "loss": 0.2668, + "step": 765 + }, + { + "epoch": 1.86, + "learning_rate": 3.652010931726262e-05, + "loss": 0.2522, + "step": 766 + }, + { + "epoch": 1.87, + "learning_rate": 3.648467584134802e-05, + "loss": 0.2658, + "step": 767 + }, + { + "epoch": 1.87, + "learning_rate": 3.6449213099107373e-05, + "loss": 0.2757, + "step": 768 + }, + { + "epoch": 1.87, + "learning_rate": 3.641372118091017e-05, + "loss": 0.2865, + "step": 769 + }, + { + "epoch": 1.87, + "learning_rate": 3.6378200177200224e-05, + "loss": 0.2468, + "step": 770 + }, + { + "epoch": 1.88, + "learning_rate": 3.634265017849549e-05, + "loss": 0.2828, + "step": 771 + }, + { + "epoch": 1.88, + "learning_rate": 3.63070712753878e-05, + "loss": 0.2562, + "step": 772 + }, + { + "epoch": 1.88, + "learning_rate": 3.6271463558542645e-05, + "loss": 0.2701, + "step": 773 + }, + { + "epoch": 1.88, + "learning_rate": 3.623582711869895e-05, + "loss": 0.2851, + "step": 774 + }, + { + "epoch": 1.89, + "learning_rate": 3.620016204666882e-05, + "loss": 0.2844, + "step": 775 + }, + { + "epoch": 1.89, + "learning_rate": 3.616446843333733e-05, + "loss": 0.284, + "step": 776 + }, + { + "epoch": 1.89, + "learning_rate": 3.612874636966228e-05, + "loss": 0.2673, + "step": 777 + }, + { + "epoch": 1.89, + "learning_rate": 3.6092995946673994e-05, + "loss": 0.2512, + "step": 778 + }, + { + "epoch": 1.9, + "learning_rate": 3.6057217255475034e-05, + "loss": 0.2598, + "step": 779 + }, + { + "epoch": 1.9, + "learning_rate": 3.602141038724001e-05, + "loss": 0.2664, + "step": 780 + }, + { + "epoch": 1.9, + "learning_rate": 3.598557543321535e-05, + "loss": 0.2745, + "step": 781 + }, + { + "epoch": 1.9, + "learning_rate": 3.5949712484719014e-05, + "loss": 0.2582, + "step": 782 + }, + { + "epoch": 1.91, + "learning_rate": 3.5913821633140336e-05, + "loss": 0.2668, + "step": 783 + }, + { + "epoch": 1.91, + "learning_rate": 3.5877902969939755e-05, + "loss": 0.2593, + "step": 784 + }, + { + "epoch": 1.91, + "learning_rate": 3.584195658664855e-05, + "loss": 0.2607, + "step": 785 + }, + { + "epoch": 1.91, + "learning_rate": 3.580598257486867e-05, + "loss": 0.2493, + "step": 786 + }, + { + "epoch": 1.92, + "learning_rate": 3.5769981026272475e-05, + "loss": 0.272, + "step": 787 + }, + { + "epoch": 1.92, + "learning_rate": 3.573395203260245e-05, + "loss": 0.2687, + "step": 788 + }, + { + "epoch": 1.92, + "learning_rate": 3.569789568567107e-05, + "loss": 0.2735, + "step": 789 + }, + { + "epoch": 1.92, + "learning_rate": 3.56618120773605e-05, + "loss": 0.254, + "step": 790 + }, + { + "epoch": 1.93, + "learning_rate": 3.5625701299622336e-05, + "loss": 0.2665, + "step": 791 + }, + { + "epoch": 1.93, + "learning_rate": 3.558956344447748e-05, + "loss": 0.2654, + "step": 792 + }, + { + "epoch": 1.93, + "learning_rate": 3.555339860401578e-05, + "loss": 0.2718, + "step": 793 + }, + { + "epoch": 1.93, + "learning_rate": 3.551720687039585e-05, + "loss": 0.2475, + "step": 794 + }, + { + "epoch": 1.93, + "learning_rate": 3.5480988335844886e-05, + "loss": 0.269, + "step": 795 + }, + { + "epoch": 1.94, + "learning_rate": 3.544474309265834e-05, + "loss": 0.2577, + "step": 796 + }, + { + "epoch": 1.94, + "learning_rate": 3.5408471233199716e-05, + "loss": 0.2848, + "step": 797 + }, + { + "epoch": 1.94, + "learning_rate": 3.5372172849900374e-05, + "loss": 0.2677, + "step": 798 + }, + { + "epoch": 1.94, + "learning_rate": 3.533584803525926e-05, + "loss": 0.2583, + "step": 799 + }, + { + "epoch": 1.95, + "learning_rate": 3.529949688184265e-05, + "loss": 0.2596, + "step": 800 + }, + { + "epoch": 1.95, + "learning_rate": 3.526311948228397e-05, + "loss": 0.2552, + "step": 801 + }, + { + "epoch": 1.95, + "learning_rate": 3.5226715929283506e-05, + "loss": 0.2709, + "step": 802 + }, + { + "epoch": 1.95, + "learning_rate": 3.519028631560819e-05, + "loss": 0.2602, + "step": 803 + }, + { + "epoch": 1.96, + "learning_rate": 3.51538307340914e-05, + "loss": 0.2537, + "step": 804 + }, + { + "epoch": 1.96, + "learning_rate": 3.511734927763265e-05, + "loss": 0.274, + "step": 805 + }, + { + "epoch": 1.96, + "learning_rate": 3.508084203919739e-05, + "loss": 0.2442, + "step": 806 + }, + { + "epoch": 1.96, + "learning_rate": 3.5044309111816796e-05, + "loss": 0.2676, + "step": 807 + }, + { + "epoch": 1.97, + "learning_rate": 3.50077505885875e-05, + "loss": 0.2875, + "step": 808 + }, + { + "epoch": 1.97, + "learning_rate": 3.4971166562671324e-05, + "loss": 0.2686, + "step": 809 + }, + { + "epoch": 1.97, + "learning_rate": 3.493455712729514e-05, + "loss": 0.2753, + "step": 810 + }, + { + "epoch": 1.97, + "learning_rate": 3.4897922375750514e-05, + "loss": 0.2703, + "step": 811 + }, + { + "epoch": 1.98, + "learning_rate": 3.4861262401393566e-05, + "loss": 0.2661, + "step": 812 + }, + { + "epoch": 1.98, + "learning_rate": 3.482457729764466e-05, + "loss": 0.2644, + "step": 813 + }, + { + "epoch": 1.98, + "learning_rate": 3.478786715798823e-05, + "loss": 0.3001, + "step": 814 + }, + { + "epoch": 1.98, + "learning_rate": 3.475113207597247e-05, + "loss": 0.269, + "step": 815 + }, + { + "epoch": 1.99, + "learning_rate": 3.4714372145209166e-05, + "loss": 0.2618, + "step": 816 + }, + { + "epoch": 1.99, + "learning_rate": 3.467758745937342e-05, + "loss": 0.2592, + "step": 817 + }, + { + "epoch": 1.99, + "learning_rate": 3.46407781122034e-05, + "loss": 0.2805, + "step": 818 + }, + { + "epoch": 1.99, + "learning_rate": 3.460394419750013e-05, + "loss": 0.2432, + "step": 819 + }, + { + "epoch": 2.0, + "learning_rate": 3.456708580912725e-05, + "loss": 0.2524, + "step": 820 + }, + { + "epoch": 2.0, + "learning_rate": 3.4530203041010745e-05, + "loss": 0.2529, + "step": 821 + }, + { + "epoch": 2.0, + "eval_loss": 0.5900537967681885, + "eval_runtime": 116.0624, + "eval_samples_per_second": 6.565, + "eval_steps_per_second": 0.414, + "step": 821 + }, + { + "epoch": 2.0, + "learning_rate": 3.449329598713874e-05, + "loss": 0.2271, + "step": 822 + }, + { + "epoch": 2.0, + "learning_rate": 3.445636474156125e-05, + "loss": 0.1519, + "step": 823 + }, + { + "epoch": 2.01, + "learning_rate": 3.4419409398389935e-05, + "loss": 0.1477, + "step": 824 + }, + { + "epoch": 2.01, + "learning_rate": 3.438243005179784e-05, + "loss": 0.1407, + "step": 825 + }, + { + "epoch": 2.01, + "learning_rate": 3.434542679601922e-05, + "loss": 0.1235, + "step": 826 + }, + { + "epoch": 2.01, + "learning_rate": 3.4308399725349226e-05, + "loss": 0.1323, + "step": 827 + }, + { + "epoch": 2.02, + "learning_rate": 3.42713489341437e-05, + "loss": 0.1445, + "step": 828 + }, + { + "epoch": 2.02, + "learning_rate": 3.423427451681895e-05, + "loss": 0.1257, + "step": 829 + }, + { + "epoch": 2.02, + "learning_rate": 3.419717656785146e-05, + "loss": 0.1249, + "step": 830 + }, + { + "epoch": 2.02, + "learning_rate": 3.416005518177771e-05, + "loss": 0.1279, + "step": 831 + }, + { + "epoch": 2.02, + "learning_rate": 3.4122910453193885e-05, + "loss": 0.1278, + "step": 832 + }, + { + "epoch": 2.03, + "learning_rate": 3.408574247675566e-05, + "loss": 0.1207, + "step": 833 + }, + { + "epoch": 2.03, + "learning_rate": 3.4048551347177945e-05, + "loss": 0.1184, + "step": 834 + }, + { + "epoch": 2.03, + "learning_rate": 3.401133715923467e-05, + "loss": 0.1308, + "step": 835 + }, + { + "epoch": 2.03, + "learning_rate": 3.3974100007758514e-05, + "loss": 0.1291, + "step": 836 + }, + { + "epoch": 2.04, + "learning_rate": 3.3936839987640664e-05, + "loss": 0.1129, + "step": 837 + }, + { + "epoch": 2.04, + "learning_rate": 3.389955719383058e-05, + "loss": 0.1264, + "step": 838 + }, + { + "epoch": 2.04, + "learning_rate": 3.3862251721335794e-05, + "loss": 0.1149, + "step": 839 + }, + { + "epoch": 2.04, + "learning_rate": 3.382492366522158e-05, + "loss": 0.1234, + "step": 840 + }, + { + "epoch": 2.05, + "learning_rate": 3.378757312061079e-05, + "loss": 0.1245, + "step": 841 + }, + { + "epoch": 2.05, + "learning_rate": 3.375020018268359e-05, + "loss": 0.1154, + "step": 842 + }, + { + "epoch": 2.05, + "learning_rate": 3.371280494667719e-05, + "loss": 0.1231, + "step": 843 + }, + { + "epoch": 2.05, + "learning_rate": 3.367538750788563e-05, + "loss": 0.1224, + "step": 844 + }, + { + "epoch": 2.06, + "learning_rate": 3.363794796165953e-05, + "loss": 0.1196, + "step": 845 + }, + { + "epoch": 2.06, + "learning_rate": 3.360048640340585e-05, + "loss": 0.1189, + "step": 846 + }, + { + "epoch": 2.06, + "learning_rate": 3.3563002928587627e-05, + "loss": 0.1173, + "step": 847 + }, + { + "epoch": 2.06, + "learning_rate": 3.352549763272379e-05, + "loss": 0.1248, + "step": 848 + }, + { + "epoch": 2.07, + "learning_rate": 3.348797061138881e-05, + "loss": 0.1255, + "step": 849 + }, + { + "epoch": 2.07, + "learning_rate": 3.3450421960212566e-05, + "loss": 0.1178, + "step": 850 + }, + { + "epoch": 2.07, + "learning_rate": 3.3412851774880064e-05, + "loss": 0.121, + "step": 851 + }, + { + "epoch": 2.07, + "learning_rate": 3.337526015113115e-05, + "loss": 0.1186, + "step": 852 + }, + { + "epoch": 2.08, + "learning_rate": 3.3337647184760315e-05, + "loss": 0.1191, + "step": 853 + }, + { + "epoch": 2.08, + "learning_rate": 3.3300012971616467e-05, + "loss": 0.1223, + "step": 854 + }, + { + "epoch": 2.08, + "learning_rate": 3.3262357607602596e-05, + "loss": 0.1104, + "step": 855 + }, + { + "epoch": 2.08, + "learning_rate": 3.322468118867564e-05, + "loss": 0.125, + "step": 856 + }, + { + "epoch": 2.09, + "learning_rate": 3.318698381084619e-05, + "loss": 0.1221, + "step": 857 + }, + { + "epoch": 2.09, + "learning_rate": 3.314926557017821e-05, + "loss": 0.1181, + "step": 858 + }, + { + "epoch": 2.09, + "learning_rate": 3.3111526562788864e-05, + "loss": 0.1197, + "step": 859 + }, + { + "epoch": 2.09, + "learning_rate": 3.3073766884848234e-05, + "loss": 0.1168, + "step": 860 + }, + { + "epoch": 2.1, + "learning_rate": 3.303598663257904e-05, + "loss": 0.1186, + "step": 861 + }, + { + "epoch": 2.1, + "learning_rate": 3.299818590225647e-05, + "loss": 0.1192, + "step": 862 + }, + { + "epoch": 2.1, + "learning_rate": 3.29603647902079e-05, + "loss": 0.1192, + "step": 863 + }, + { + "epoch": 2.1, + "learning_rate": 3.2922523392812605e-05, + "loss": 0.1285, + "step": 864 + }, + { + "epoch": 2.11, + "learning_rate": 3.2884661806501574e-05, + "loss": 0.1299, + "step": 865 + }, + { + "epoch": 2.11, + "learning_rate": 3.284678012775727e-05, + "loss": 0.1322, + "step": 866 + }, + { + "epoch": 2.11, + "learning_rate": 3.280887845311332e-05, + "loss": 0.1174, + "step": 867 + }, + { + "epoch": 2.11, + "learning_rate": 3.27709568791543e-05, + "loss": 0.1225, + "step": 868 + }, + { + "epoch": 2.11, + "learning_rate": 3.273301550251555e-05, + "loss": 0.1194, + "step": 869 + }, + { + "epoch": 2.12, + "learning_rate": 3.269505441988281e-05, + "loss": 0.1139, + "step": 870 + }, + { + "epoch": 2.12, + "learning_rate": 3.265707372799208e-05, + "loss": 0.1294, + "step": 871 + }, + { + "epoch": 2.12, + "learning_rate": 3.2619073523629304e-05, + "loss": 0.1244, + "step": 872 + }, + { + "epoch": 2.12, + "learning_rate": 3.258105390363016e-05, + "loss": 0.1284, + "step": 873 + }, + { + "epoch": 2.13, + "learning_rate": 3.2543014964879816e-05, + "loss": 0.1234, + "step": 874 + }, + { + "epoch": 2.13, + "learning_rate": 3.250495680431264e-05, + "loss": 0.1163, + "step": 875 + }, + { + "epoch": 2.13, + "learning_rate": 3.246687951891201e-05, + "loss": 0.1269, + "step": 876 + }, + { + "epoch": 2.13, + "learning_rate": 3.2428783205710026e-05, + "loss": 0.1174, + "step": 877 + }, + { + "epoch": 2.14, + "learning_rate": 3.2390667961787275e-05, + "loss": 0.1226, + "step": 878 + }, + { + "epoch": 2.14, + "learning_rate": 3.23525338842726e-05, + "loss": 0.1174, + "step": 879 + }, + { + "epoch": 2.14, + "learning_rate": 3.231438107034281e-05, + "loss": 0.1212, + "step": 880 + }, + { + "epoch": 2.14, + "learning_rate": 3.22762096172225e-05, + "loss": 0.1189, + "step": 881 + }, + { + "epoch": 2.15, + "learning_rate": 3.223801962218372e-05, + "loss": 0.1232, + "step": 882 + }, + { + "epoch": 2.15, + "learning_rate": 3.21998111825458e-05, + "loss": 0.1271, + "step": 883 + }, + { + "epoch": 2.15, + "learning_rate": 3.216158439567506e-05, + "loss": 0.1229, + "step": 884 + }, + { + "epoch": 2.15, + "learning_rate": 3.2123339358984575e-05, + "loss": 0.1144, + "step": 885 + }, + { + "epoch": 2.16, + "learning_rate": 3.208507616993393e-05, + "loss": 0.1251, + "step": 886 + }, + { + "epoch": 2.16, + "learning_rate": 3.2046794926028964e-05, + "loss": 0.1176, + "step": 887 + }, + { + "epoch": 2.16, + "learning_rate": 3.200849572482153e-05, + "loss": 0.1346, + "step": 888 + }, + { + "epoch": 2.16, + "learning_rate": 3.1970178663909236e-05, + "loss": 0.1159, + "step": 889 + }, + { + "epoch": 2.17, + "learning_rate": 3.19318438409352e-05, + "loss": 0.1366, + "step": 890 + }, + { + "epoch": 2.17, + "learning_rate": 3.189349135358781e-05, + "loss": 0.1249, + "step": 891 + }, + { + "epoch": 2.17, + "learning_rate": 3.1855121299600456e-05, + "loss": 0.1261, + "step": 892 + }, + { + "epoch": 2.17, + "learning_rate": 3.181673377675131e-05, + "loss": 0.1217, + "step": 893 + }, + { + "epoch": 2.18, + "learning_rate": 3.1778328882863054e-05, + "loss": 0.1191, + "step": 894 + }, + { + "epoch": 2.18, + "learning_rate": 3.173990671580263e-05, + "loss": 0.1155, + "step": 895 + }, + { + "epoch": 2.18, + "learning_rate": 3.1701467373480995e-05, + "loss": 0.1107, + "step": 896 + }, + { + "epoch": 2.18, + "learning_rate": 3.166301095385288e-05, + "loss": 0.1177, + "step": 897 + }, + { + "epoch": 2.19, + "learning_rate": 3.162453755491655e-05, + "loss": 0.1212, + "step": 898 + }, + { + "epoch": 2.19, + "learning_rate": 3.1586047274713494e-05, + "loss": 0.126, + "step": 899 + }, + { + "epoch": 2.19, + "learning_rate": 3.154754021132827e-05, + "loss": 0.1171, + "step": 900 + }, + { + "epoch": 2.19, + "learning_rate": 3.1509016462888174e-05, + "loss": 0.1225, + "step": 901 + }, + { + "epoch": 2.2, + "learning_rate": 3.147047612756302e-05, + "loss": 0.1315, + "step": 902 + }, + { + "epoch": 2.2, + "learning_rate": 3.143191930356491e-05, + "loss": 0.1207, + "step": 903 + }, + { + "epoch": 2.2, + "learning_rate": 3.139334608914795e-05, + "loss": 0.1307, + "step": 904 + }, + { + "epoch": 2.2, + "learning_rate": 3.135475658260801e-05, + "loss": 0.1163, + "step": 905 + }, + { + "epoch": 2.21, + "learning_rate": 3.131615088228249e-05, + "loss": 0.1162, + "step": 906 + }, + { + "epoch": 2.21, + "learning_rate": 3.127752908655004e-05, + "loss": 0.1234, + "step": 907 + }, + { + "epoch": 2.21, + "learning_rate": 3.123889129383034e-05, + "loss": 0.1168, + "step": 908 + }, + { + "epoch": 2.21, + "learning_rate": 3.1200237602583834e-05, + "loss": 0.1238, + "step": 909 + }, + { + "epoch": 2.21, + "learning_rate": 3.116156811131148e-05, + "loss": 0.1255, + "step": 910 + }, + { + "epoch": 2.22, + "learning_rate": 3.112288291855449e-05, + "loss": 0.124, + "step": 911 + }, + { + "epoch": 2.22, + "learning_rate": 3.108418212289408e-05, + "loss": 0.1267, + "step": 912 + }, + { + "epoch": 2.22, + "learning_rate": 3.104546582295126e-05, + "loss": 0.124, + "step": 913 + }, + { + "epoch": 2.22, + "learning_rate": 3.100673411738652e-05, + "loss": 0.1308, + "step": 914 + }, + { + "epoch": 2.23, + "learning_rate": 3.096798710489962e-05, + "loss": 0.1213, + "step": 915 + }, + { + "epoch": 2.23, + "learning_rate": 3.092922488422933e-05, + "loss": 0.1277, + "step": 916 + }, + { + "epoch": 2.23, + "learning_rate": 3.089044755415315e-05, + "loss": 0.1201, + "step": 917 + }, + { + "epoch": 2.23, + "learning_rate": 3.0851655213487124e-05, + "loss": 0.115, + "step": 918 + }, + { + "epoch": 2.24, + "learning_rate": 3.0812847961085526e-05, + "loss": 0.1257, + "step": 919 + }, + { + "epoch": 2.24, + "learning_rate": 3.077402589584061e-05, + "loss": 0.1203, + "step": 920 + }, + { + "epoch": 2.24, + "learning_rate": 3.0735189116682414e-05, + "loss": 0.1255, + "step": 921 + }, + { + "epoch": 2.24, + "learning_rate": 3.0696337722578444e-05, + "loss": 0.1215, + "step": 922 + }, + { + "epoch": 2.25, + "learning_rate": 3.065747181253346e-05, + "loss": 0.1275, + "step": 923 + }, + { + "epoch": 2.25, + "learning_rate": 3.0618591485589224e-05, + "loss": 0.1346, + "step": 924 + }, + { + "epoch": 2.25, + "learning_rate": 3.0579696840824206e-05, + "loss": 0.1285, + "step": 925 + }, + { + "epoch": 2.25, + "learning_rate": 3.05407879773534e-05, + "loss": 0.1261, + "step": 926 + }, + { + "epoch": 2.26, + "learning_rate": 3.0501864994328e-05, + "loss": 0.1192, + "step": 927 + }, + { + "epoch": 2.26, + "learning_rate": 3.04629279909352e-05, + "loss": 0.1186, + "step": 928 + }, + { + "epoch": 2.26, + "learning_rate": 3.0423977066397912e-05, + "loss": 0.1244, + "step": 929 + }, + { + "epoch": 2.26, + "learning_rate": 3.0385012319974537e-05, + "loss": 0.1248, + "step": 930 + }, + { + "epoch": 2.27, + "learning_rate": 3.034603385095868e-05, + "loss": 0.1155, + "step": 931 + }, + { + "epoch": 2.27, + "learning_rate": 3.0307041758678932e-05, + "loss": 0.1267, + "step": 932 + }, + { + "epoch": 2.27, + "learning_rate": 3.0268036142498596e-05, + "loss": 0.1219, + "step": 933 + }, + { + "epoch": 2.27, + "learning_rate": 3.022901710181542e-05, + "loss": 0.126, + "step": 934 + }, + { + "epoch": 2.28, + "learning_rate": 3.018998473606139e-05, + "loss": 0.1219, + "step": 935 + }, + { + "epoch": 2.28, + "learning_rate": 3.0150939144702423e-05, + "loss": 0.1208, + "step": 936 + }, + { + "epoch": 2.28, + "learning_rate": 3.011188042723816e-05, + "loss": 0.1234, + "step": 937 + }, + { + "epoch": 2.28, + "learning_rate": 3.007280868320167e-05, + "loss": 0.1252, + "step": 938 + }, + { + "epoch": 2.29, + "learning_rate": 3.0033724012159242e-05, + "loss": 0.1185, + "step": 939 + }, + { + "epoch": 2.29, + "learning_rate": 2.9994626513710084e-05, + "loss": 0.1194, + "step": 940 + }, + { + "epoch": 2.29, + "learning_rate": 2.99555162874861e-05, + "loss": 0.1238, + "step": 941 + }, + { + "epoch": 2.29, + "learning_rate": 2.9916393433151634e-05, + "loss": 0.1208, + "step": 942 + }, + { + "epoch": 2.3, + "learning_rate": 2.9877258050403212e-05, + "loss": 0.1218, + "step": 943 + }, + { + "epoch": 2.3, + "learning_rate": 2.9838110238969264e-05, + "loss": 0.1254, + "step": 944 + }, + { + "epoch": 2.3, + "learning_rate": 2.9798950098609923e-05, + "loss": 0.1208, + "step": 945 + }, + { + "epoch": 2.3, + "learning_rate": 2.975977772911671e-05, + "loss": 0.1211, + "step": 946 + }, + { + "epoch": 2.3, + "learning_rate": 2.9720593230312337e-05, + "loss": 0.1177, + "step": 947 + }, + { + "epoch": 2.31, + "learning_rate": 2.9681396702050406e-05, + "loss": 0.1187, + "step": 948 + }, + { + "epoch": 2.31, + "learning_rate": 2.964218824421518e-05, + "loss": 0.125, + "step": 949 + }, + { + "epoch": 2.31, + "learning_rate": 2.9602967956721316e-05, + "loss": 0.1174, + "step": 950 + }, + { + "epoch": 2.31, + "learning_rate": 2.9563735939513636e-05, + "loss": 0.1167, + "step": 951 + }, + { + "epoch": 2.32, + "learning_rate": 2.9524492292566823e-05, + "loss": 0.1175, + "step": 952 + }, + { + "epoch": 2.32, + "learning_rate": 2.948523711588522e-05, + "loss": 0.1295, + "step": 953 + }, + { + "epoch": 2.32, + "learning_rate": 2.9445970509502546e-05, + "loss": 0.1336, + "step": 954 + }, + { + "epoch": 2.32, + "learning_rate": 2.940669257348163e-05, + "loss": 0.1218, + "step": 955 + }, + { + "epoch": 2.33, + "learning_rate": 2.9367403407914202e-05, + "loss": 0.1205, + "step": 956 + }, + { + "epoch": 2.33, + "learning_rate": 2.932810311292058e-05, + "loss": 0.1311, + "step": 957 + }, + { + "epoch": 2.33, + "learning_rate": 2.9288791788649462e-05, + "loss": 0.1258, + "step": 958 + }, + { + "epoch": 2.33, + "learning_rate": 2.9249469535277636e-05, + "loss": 0.1255, + "step": 959 + }, + { + "epoch": 2.34, + "learning_rate": 2.921013645300975e-05, + "loss": 0.1263, + "step": 960 + }, + { + "epoch": 2.34, + "learning_rate": 2.9170792642078055e-05, + "loss": 0.1219, + "step": 961 + }, + { + "epoch": 2.34, + "learning_rate": 2.9131438202742124e-05, + "loss": 0.1224, + "step": 962 + }, + { + "epoch": 2.34, + "learning_rate": 2.909207323528863e-05, + "loss": 0.122, + "step": 963 + }, + { + "epoch": 2.35, + "learning_rate": 2.9052697840031064e-05, + "loss": 0.1199, + "step": 964 + }, + { + "epoch": 2.35, + "learning_rate": 2.9013312117309488e-05, + "loss": 0.1206, + "step": 965 + }, + { + "epoch": 2.35, + "learning_rate": 2.8973916167490307e-05, + "loss": 0.1163, + "step": 966 + }, + { + "epoch": 2.35, + "learning_rate": 2.8934510090965944e-05, + "loss": 0.1222, + "step": 967 + }, + { + "epoch": 2.36, + "learning_rate": 2.889509398815467e-05, + "loss": 0.114, + "step": 968 + }, + { + "epoch": 2.36, + "learning_rate": 2.8855667959500276e-05, + "loss": 0.1218, + "step": 969 + }, + { + "epoch": 2.36, + "learning_rate": 2.8816232105471863e-05, + "loss": 0.1257, + "step": 970 + }, + { + "epoch": 2.36, + "learning_rate": 2.8776786526563575e-05, + "loss": 0.1216, + "step": 971 + }, + { + "epoch": 2.37, + "learning_rate": 2.8737331323294314e-05, + "loss": 0.1285, + "step": 972 + }, + { + "epoch": 2.37, + "learning_rate": 2.8697866596207524e-05, + "loss": 0.1285, + "step": 973 + }, + { + "epoch": 2.37, + "learning_rate": 2.8658392445870928e-05, + "loss": 0.1286, + "step": 974 + }, + { + "epoch": 2.37, + "learning_rate": 2.8618908972876246e-05, + "loss": 0.1267, + "step": 975 + }, + { + "epoch": 2.38, + "learning_rate": 2.857941627783895e-05, + "loss": 0.1182, + "step": 976 + }, + { + "epoch": 2.38, + "learning_rate": 2.8539914461398043e-05, + "loss": 0.1193, + "step": 977 + }, + { + "epoch": 2.38, + "learning_rate": 2.8500403624215734e-05, + "loss": 0.1157, + "step": 978 + }, + { + "epoch": 2.38, + "learning_rate": 2.846088386697723e-05, + "loss": 0.1269, + "step": 979 + }, + { + "epoch": 2.39, + "learning_rate": 2.8421355290390506e-05, + "loss": 0.1251, + "step": 980 + }, + { + "epoch": 2.39, + "learning_rate": 2.838181799518595e-05, + "loss": 0.1176, + "step": 981 + }, + { + "epoch": 2.39, + "learning_rate": 2.834227208211621e-05, + "loss": 0.1238, + "step": 982 + }, + { + "epoch": 2.39, + "learning_rate": 2.8302717651955895e-05, + "loss": 0.1226, + "step": 983 + }, + { + "epoch": 2.39, + "learning_rate": 2.8263154805501297e-05, + "loss": 0.1294, + "step": 984 + }, + { + "epoch": 2.4, + "learning_rate": 2.822358364357015e-05, + "loss": 0.1201, + "step": 985 + }, + { + "epoch": 2.4, + "learning_rate": 2.8184004267001425e-05, + "loss": 0.1255, + "step": 986 + }, + { + "epoch": 2.4, + "learning_rate": 2.8144416776654963e-05, + "loss": 0.1228, + "step": 987 + }, + { + "epoch": 2.4, + "learning_rate": 2.810482127341133e-05, + "loss": 0.1222, + "step": 988 + }, + { + "epoch": 2.41, + "learning_rate": 2.8065217858171495e-05, + "loss": 0.118, + "step": 989 + }, + { + "epoch": 2.41, + "learning_rate": 2.8025606631856578e-05, + "loss": 0.1261, + "step": 990 + }, + { + "epoch": 2.41, + "learning_rate": 2.7985987695407616e-05, + "loss": 0.1333, + "step": 991 + }, + { + "epoch": 2.41, + "learning_rate": 2.7946361149785306e-05, + "loss": 0.1165, + "step": 992 + }, + { + "epoch": 2.42, + "learning_rate": 2.79067270959697e-05, + "loss": 0.1295, + "step": 993 + }, + { + "epoch": 2.42, + "learning_rate": 2.7867085634960016e-05, + "loss": 0.125, + "step": 994 + }, + { + "epoch": 2.42, + "learning_rate": 2.782743686777433e-05, + "loss": 0.1143, + "step": 995 + }, + { + "epoch": 2.42, + "learning_rate": 2.778778089544935e-05, + "loss": 0.1247, + "step": 996 + }, + { + "epoch": 2.43, + "learning_rate": 2.7748117819040127e-05, + "loss": 0.1219, + "step": 997 + }, + { + "epoch": 2.43, + "learning_rate": 2.770844773961983e-05, + "loss": 0.1319, + "step": 998 + }, + { + "epoch": 2.43, + "learning_rate": 2.7668770758279473e-05, + "loss": 0.1291, + "step": 999 + }, + { + "epoch": 2.43, + "learning_rate": 2.762908697612765e-05, + "loss": 0.118, + "step": 1000 + }, + { + "epoch": 2.44, + "learning_rate": 2.7589396494290287e-05, + "loss": 0.134, + "step": 1001 + }, + { + "epoch": 2.44, + "learning_rate": 2.7549699413910384e-05, + "loss": 0.1295, + "step": 1002 + }, + { + "epoch": 2.44, + "learning_rate": 2.7509995836147766e-05, + "loss": 0.1227, + "step": 1003 + }, + { + "epoch": 2.44, + "learning_rate": 2.74702858621788e-05, + "loss": 0.118, + "step": 1004 + }, + { + "epoch": 2.45, + "learning_rate": 2.743056959319616e-05, + "loss": 0.1226, + "step": 1005 + }, + { + "epoch": 2.45, + "learning_rate": 2.739084713040856e-05, + "loss": 0.1257, + "step": 1006 + }, + { + "epoch": 2.45, + "learning_rate": 2.7351118575040496e-05, + "loss": 0.1215, + "step": 1007 + }, + { + "epoch": 2.45, + "learning_rate": 2.7311384028332e-05, + "loss": 0.1232, + "step": 1008 + }, + { + "epoch": 2.46, + "learning_rate": 2.7271643591538353e-05, + "loss": 0.1208, + "step": 1009 + }, + { + "epoch": 2.46, + "learning_rate": 2.723189736592986e-05, + "loss": 0.1248, + "step": 1010 + }, + { + "epoch": 2.46, + "learning_rate": 2.719214545279158e-05, + "loss": 0.119, + "step": 1011 + }, + { + "epoch": 2.46, + "learning_rate": 2.715238795342305e-05, + "loss": 0.1213, + "step": 1012 + }, + { + "epoch": 2.47, + "learning_rate": 2.711262496913805e-05, + "loss": 0.122, + "step": 1013 + }, + { + "epoch": 2.47, + "learning_rate": 2.7072856601264345e-05, + "loss": 0.1218, + "step": 1014 + }, + { + "epoch": 2.47, + "learning_rate": 2.7033082951143418e-05, + "loss": 0.1178, + "step": 1015 + }, + { + "epoch": 2.47, + "learning_rate": 2.6993304120130196e-05, + "loss": 0.127, + "step": 1016 + }, + { + "epoch": 2.48, + "learning_rate": 2.6953520209592824e-05, + "loss": 0.1145, + "step": 1017 + }, + { + "epoch": 2.48, + "learning_rate": 2.69137313209124e-05, + "loss": 0.1256, + "step": 1018 + }, + { + "epoch": 2.48, + "learning_rate": 2.6873937555482663e-05, + "loss": 0.1305, + "step": 1019 + }, + { + "epoch": 2.48, + "learning_rate": 2.6834139014709843e-05, + "loss": 0.1268, + "step": 1020 + }, + { + "epoch": 2.48, + "learning_rate": 2.6794335800012293e-05, + "loss": 0.1235, + "step": 1021 + }, + { + "epoch": 2.49, + "learning_rate": 2.6754528012820283e-05, + "loss": 0.125, + "step": 1022 + }, + { + "epoch": 2.49, + "learning_rate": 2.671471575457576e-05, + "loss": 0.1309, + "step": 1023 + }, + { + "epoch": 2.49, + "learning_rate": 2.6674899126732045e-05, + "loss": 0.115, + "step": 1024 + }, + { + "epoch": 2.49, + "learning_rate": 2.663507823075358e-05, + "loss": 0.1269, + "step": 1025 + }, + { + "epoch": 2.5, + "learning_rate": 2.659525316811571e-05, + "loss": 0.1276, + "step": 1026 + }, + { + "epoch": 2.5, + "learning_rate": 2.6555424040304398e-05, + "loss": 0.1118, + "step": 1027 + }, + { + "epoch": 2.5, + "learning_rate": 2.6515590948815933e-05, + "loss": 0.1252, + "step": 1028 + }, + { + "epoch": 2.5, + "learning_rate": 2.6475753995156743e-05, + "loss": 0.1184, + "step": 1029 + }, + { + "epoch": 2.51, + "learning_rate": 2.643591328084309e-05, + "loss": 0.1217, + "step": 1030 + }, + { + "epoch": 2.51, + "learning_rate": 2.6396068907400784e-05, + "loss": 0.1271, + "step": 1031 + }, + { + "epoch": 2.51, + "learning_rate": 2.635622097636501e-05, + "loss": 0.1175, + "step": 1032 + }, + { + "epoch": 2.51, + "learning_rate": 2.6316369589279998e-05, + "loss": 0.1184, + "step": 1033 + }, + { + "epoch": 2.52, + "learning_rate": 2.6276514847698762e-05, + "loss": 0.1197, + "step": 1034 + }, + { + "epoch": 2.52, + "learning_rate": 2.623665685318291e-05, + "loss": 0.1269, + "step": 1035 + }, + { + "epoch": 2.52, + "learning_rate": 2.6196795707302302e-05, + "loss": 0.1257, + "step": 1036 + }, + { + "epoch": 2.52, + "learning_rate": 2.6156931511634834e-05, + "loss": 0.1276, + "step": 1037 + }, + { + "epoch": 2.53, + "learning_rate": 2.6117064367766197e-05, + "loss": 0.1322, + "step": 1038 + }, + { + "epoch": 2.53, + "learning_rate": 2.607719437728957e-05, + "loss": 0.1239, + "step": 1039 + }, + { + "epoch": 2.53, + "learning_rate": 2.603732164180539e-05, + "loss": 0.1169, + "step": 1040 + }, + { + "epoch": 2.53, + "learning_rate": 2.5997446262921106e-05, + "loss": 0.1144, + "step": 1041 + }, + { + "epoch": 2.54, + "learning_rate": 2.595756834225089e-05, + "loss": 0.1223, + "step": 1042 + }, + { + "epoch": 2.54, + "learning_rate": 2.5917687981415373e-05, + "loss": 0.1238, + "step": 1043 + }, + { + "epoch": 2.54, + "learning_rate": 2.5877805282041455e-05, + "loss": 0.1201, + "step": 1044 + }, + { + "epoch": 2.54, + "learning_rate": 2.583792034576194e-05, + "loss": 0.1327, + "step": 1045 + }, + { + "epoch": 2.55, + "learning_rate": 2.579803327421536e-05, + "loss": 0.1336, + "step": 1046 + }, + { + "epoch": 2.55, + "learning_rate": 2.575814416904569e-05, + "loss": 0.1177, + "step": 1047 + }, + { + "epoch": 2.55, + "learning_rate": 2.571825313190208e-05, + "loss": 0.1153, + "step": 1048 + }, + { + "epoch": 2.55, + "learning_rate": 2.5678360264438606e-05, + "loss": 0.1199, + "step": 1049 + }, + { + "epoch": 2.56, + "learning_rate": 2.5638465668314006e-05, + "loss": 0.1191, + "step": 1050 + }, + { + "epoch": 2.56, + "learning_rate": 2.5598569445191418e-05, + "loss": 0.1132, + "step": 1051 + }, + { + "epoch": 2.56, + "learning_rate": 2.5558671696738146e-05, + "loss": 0.1266, + "step": 1052 + }, + { + "epoch": 2.56, + "learning_rate": 2.5518772524625357e-05, + "loss": 0.1191, + "step": 1053 + }, + { + "epoch": 2.57, + "learning_rate": 2.5478872030527855e-05, + "loss": 0.1148, + "step": 1054 + }, + { + "epoch": 2.57, + "learning_rate": 2.5438970316123822e-05, + "loss": 0.1224, + "step": 1055 + }, + { + "epoch": 2.57, + "learning_rate": 2.539906748309454e-05, + "loss": 0.1136, + "step": 1056 + }, + { + "epoch": 2.57, + "learning_rate": 2.535916363312414e-05, + "loss": 0.1199, + "step": 1057 + }, + { + "epoch": 2.57, + "learning_rate": 2.5319258867899348e-05, + "loss": 0.1241, + "step": 1058 + }, + { + "epoch": 2.58, + "learning_rate": 2.5279353289109227e-05, + "loss": 0.1202, + "step": 1059 + }, + { + "epoch": 2.58, + "learning_rate": 2.5239446998444898e-05, + "loss": 0.1247, + "step": 1060 + }, + { + "epoch": 2.58, + "learning_rate": 2.5199540097599318e-05, + "loss": 0.1345, + "step": 1061 + }, + { + "epoch": 2.58, + "learning_rate": 2.5159632688266982e-05, + "loss": 0.1223, + "step": 1062 + }, + { + "epoch": 2.59, + "learning_rate": 2.511972487214369e-05, + "loss": 0.1141, + "step": 1063 + }, + { + "epoch": 2.59, + "learning_rate": 2.5079816750926265e-05, + "loss": 0.1257, + "step": 1064 + }, + { + "epoch": 2.59, + "learning_rate": 2.5039908426312332e-05, + "loss": 0.1235, + "step": 1065 + }, + { + "epoch": 2.59, + "learning_rate": 2.5e-05, + "loss": 0.1297, + "step": 1066 + }, + { + "epoch": 2.6, + "learning_rate": 2.4960091573687677e-05, + "loss": 0.1281, + "step": 1067 + }, + { + "epoch": 2.6, + "learning_rate": 2.4920183249073744e-05, + "loss": 0.1176, + "step": 1068 + }, + { + "epoch": 2.6, + "learning_rate": 2.488027512785632e-05, + "loss": 0.1204, + "step": 1069 + }, + { + "epoch": 2.6, + "learning_rate": 2.4840367311733024e-05, + "loss": 0.1318, + "step": 1070 + }, + { + "epoch": 2.61, + "learning_rate": 2.4800459902400684e-05, + "loss": 0.1293, + "step": 1071 + }, + { + "epoch": 2.61, + "learning_rate": 2.4760553001555108e-05, + "loss": 0.1154, + "step": 1072 + }, + { + "epoch": 2.61, + "learning_rate": 2.472064671089078e-05, + "loss": 0.1178, + "step": 1073 + }, + { + "epoch": 2.61, + "learning_rate": 2.468074113210066e-05, + "loss": 0.125, + "step": 1074 + }, + { + "epoch": 2.62, + "learning_rate": 2.4640836366875873e-05, + "loss": 0.1191, + "step": 1075 + }, + { + "epoch": 2.62, + "learning_rate": 2.4600932516905466e-05, + "loss": 0.1264, + "step": 1076 + }, + { + "epoch": 2.62, + "learning_rate": 2.4561029683876184e-05, + "loss": 0.1207, + "step": 1077 + }, + { + "epoch": 2.62, + "learning_rate": 2.4521127969472148e-05, + "loss": 0.1253, + "step": 1078 + }, + { + "epoch": 2.63, + "learning_rate": 2.4481227475374652e-05, + "loss": 0.1255, + "step": 1079 + }, + { + "epoch": 2.63, + "learning_rate": 2.4441328303261867e-05, + "loss": 0.1287, + "step": 1080 + }, + { + "epoch": 2.63, + "learning_rate": 2.440143055480859e-05, + "loss": 0.1176, + "step": 1081 + }, + { + "epoch": 2.63, + "learning_rate": 2.4361534331686003e-05, + "loss": 0.1223, + "step": 1082 + }, + { + "epoch": 2.64, + "learning_rate": 2.4321639735561403e-05, + "loss": 0.1321, + "step": 1083 + }, + { + "epoch": 2.64, + "learning_rate": 2.4281746868097926e-05, + "loss": 0.1268, + "step": 1084 + }, + { + "epoch": 2.64, + "learning_rate": 2.4241855830954316e-05, + "loss": 0.1229, + "step": 1085 + }, + { + "epoch": 2.64, + "learning_rate": 2.420196672578465e-05, + "loss": 0.118, + "step": 1086 + }, + { + "epoch": 2.65, + "learning_rate": 2.4162079654238073e-05, + "loss": 0.135, + "step": 1087 + }, + { + "epoch": 2.65, + "learning_rate": 2.412219471795855e-05, + "loss": 0.1135, + "step": 1088 + }, + { + "epoch": 2.65, + "learning_rate": 2.4082312018584626e-05, + "loss": 0.1158, + "step": 1089 + }, + { + "epoch": 2.65, + "learning_rate": 2.4042431657749117e-05, + "loss": 0.125, + "step": 1090 + }, + { + "epoch": 2.66, + "learning_rate": 2.40025537370789e-05, + "loss": 0.1245, + "step": 1091 + }, + { + "epoch": 2.66, + "learning_rate": 2.3962678358194614e-05, + "loss": 0.1259, + "step": 1092 + }, + { + "epoch": 2.66, + "learning_rate": 2.3922805622710438e-05, + "loss": 0.1157, + "step": 1093 + }, + { + "epoch": 2.66, + "learning_rate": 2.3882935632233805e-05, + "loss": 0.1228, + "step": 1094 + }, + { + "epoch": 2.67, + "learning_rate": 2.3843068488365168e-05, + "loss": 0.1255, + "step": 1095 + }, + { + "epoch": 2.67, + "learning_rate": 2.3803204292697704e-05, + "loss": 0.1207, + "step": 1096 + }, + { + "epoch": 2.67, + "learning_rate": 2.3763343146817096e-05, + "loss": 0.1201, + "step": 1097 + }, + { + "epoch": 2.67, + "learning_rate": 2.372348515230124e-05, + "loss": 0.1203, + "step": 1098 + }, + { + "epoch": 2.67, + "learning_rate": 2.368363041072001e-05, + "loss": 0.1234, + "step": 1099 + }, + { + "epoch": 2.68, + "learning_rate": 2.364377902363499e-05, + "loss": 0.1252, + "step": 1100 + }, + { + "epoch": 2.68, + "learning_rate": 2.3603931092599215e-05, + "loss": 0.1239, + "step": 1101 + }, + { + "epoch": 2.68, + "learning_rate": 2.356408671915692e-05, + "loss": 0.1148, + "step": 1102 + }, + { + "epoch": 2.68, + "learning_rate": 2.3524246004843263e-05, + "loss": 0.12, + "step": 1103 + }, + { + "epoch": 2.69, + "learning_rate": 2.3484409051184076e-05, + "loss": 0.12, + "step": 1104 + }, + { + "epoch": 2.69, + "learning_rate": 2.3444575959695614e-05, + "loss": 0.1235, + "step": 1105 + }, + { + "epoch": 2.69, + "learning_rate": 2.340474683188429e-05, + "loss": 0.121, + "step": 1106 + }, + { + "epoch": 2.69, + "learning_rate": 2.3364921769246423e-05, + "loss": 0.1218, + "step": 1107 + }, + { + "epoch": 2.7, + "learning_rate": 2.332510087326796e-05, + "loss": 0.1246, + "step": 1108 + }, + { + "epoch": 2.7, + "learning_rate": 2.3285284245424244e-05, + "loss": 0.1243, + "step": 1109 + }, + { + "epoch": 2.7, + "learning_rate": 2.324547198717972e-05, + "loss": 0.1206, + "step": 1110 + }, + { + "epoch": 2.7, + "learning_rate": 2.3205664199987716e-05, + "loss": 0.1172, + "step": 1111 + }, + { + "epoch": 2.71, + "learning_rate": 2.316586098529017e-05, + "loss": 0.111, + "step": 1112 + }, + { + "epoch": 2.71, + "learning_rate": 2.3126062444517336e-05, + "loss": 0.1272, + "step": 1113 + }, + { + "epoch": 2.71, + "learning_rate": 2.3086268679087607e-05, + "loss": 0.1196, + "step": 1114 + }, + { + "epoch": 2.71, + "learning_rate": 2.3046479790407178e-05, + "loss": 0.126, + "step": 1115 + }, + { + "epoch": 2.72, + "learning_rate": 2.3006695879869807e-05, + "loss": 0.1232, + "step": 1116 + }, + { + "epoch": 2.72, + "learning_rate": 2.2966917048856588e-05, + "loss": 0.115, + "step": 1117 + }, + { + "epoch": 2.72, + "learning_rate": 2.292714339873566e-05, + "loss": 0.1258, + "step": 1118 + }, + { + "epoch": 2.72, + "learning_rate": 2.288737503086195e-05, + "loss": 0.1246, + "step": 1119 + }, + { + "epoch": 2.73, + "learning_rate": 2.284761204657696e-05, + "loss": 0.1262, + "step": 1120 + }, + { + "epoch": 2.73, + "learning_rate": 2.280785454720843e-05, + "loss": 0.1126, + "step": 1121 + }, + { + "epoch": 2.73, + "learning_rate": 2.2768102634070147e-05, + "loss": 0.1244, + "step": 1122 + }, + { + "epoch": 2.73, + "learning_rate": 2.2728356408461653e-05, + "loss": 0.1271, + "step": 1123 + }, + { + "epoch": 2.74, + "learning_rate": 2.268861597166801e-05, + "loss": 0.1243, + "step": 1124 + }, + { + "epoch": 2.74, + "learning_rate": 2.26488814249595e-05, + "loss": 0.1275, + "step": 1125 + }, + { + "epoch": 2.74, + "learning_rate": 2.2609152869591446e-05, + "loss": 0.1219, + "step": 1126 + }, + { + "epoch": 2.74, + "learning_rate": 2.2569430406803846e-05, + "loss": 0.1455, + "step": 1127 + }, + { + "epoch": 2.75, + "learning_rate": 2.2529714137821206e-05, + "loss": 0.12, + "step": 1128 + }, + { + "epoch": 2.75, + "learning_rate": 2.249000416385224e-05, + "loss": 0.1142, + "step": 1129 + }, + { + "epoch": 2.75, + "learning_rate": 2.2450300586089622e-05, + "loss": 0.1285, + "step": 1130 + }, + { + "epoch": 2.75, + "learning_rate": 2.2410603505709715e-05, + "loss": 0.1238, + "step": 1131 + }, + { + "epoch": 2.76, + "learning_rate": 2.2370913023872355e-05, + "loss": 0.1238, + "step": 1132 + }, + { + "epoch": 2.76, + "learning_rate": 2.233122924172053e-05, + "loss": 0.1251, + "step": 1133 + }, + { + "epoch": 2.76, + "learning_rate": 2.229155226038017e-05, + "loss": 0.1346, + "step": 1134 + }, + { + "epoch": 2.76, + "learning_rate": 2.2251882180959875e-05, + "loss": 0.1255, + "step": 1135 + }, + { + "epoch": 2.76, + "learning_rate": 2.2212219104550665e-05, + "loss": 0.1289, + "step": 1136 + }, + { + "epoch": 2.77, + "learning_rate": 2.217256313222567e-05, + "loss": 0.1172, + "step": 1137 + }, + { + "epoch": 2.77, + "learning_rate": 2.2132914365039993e-05, + "loss": 0.1186, + "step": 1138 + }, + { + "epoch": 2.77, + "learning_rate": 2.2093272904030307e-05, + "loss": 0.1089, + "step": 1139 + }, + { + "epoch": 2.77, + "learning_rate": 2.2053638850214704e-05, + "loss": 0.1273, + "step": 1140 + }, + { + "epoch": 2.78, + "learning_rate": 2.201401230459239e-05, + "loss": 0.1231, + "step": 1141 + }, + { + "epoch": 2.78, + "learning_rate": 2.197439336814343e-05, + "loss": 0.1146, + "step": 1142 + }, + { + "epoch": 2.78, + "learning_rate": 2.1934782141828504e-05, + "loss": 0.1198, + "step": 1143 + }, + { + "epoch": 2.78, + "learning_rate": 2.1895178726588674e-05, + "loss": 0.1205, + "step": 1144 + }, + { + "epoch": 2.79, + "learning_rate": 2.185558322334504e-05, + "loss": 0.1223, + "step": 1145 + }, + { + "epoch": 2.79, + "learning_rate": 2.1815995732998584e-05, + "loss": 0.1212, + "step": 1146 + }, + { + "epoch": 2.79, + "learning_rate": 2.1776416356429856e-05, + "loss": 0.122, + "step": 1147 + }, + { + "epoch": 2.79, + "learning_rate": 2.173684519449872e-05, + "loss": 0.1261, + "step": 1148 + }, + { + "epoch": 2.8, + "learning_rate": 2.169728234804411e-05, + "loss": 0.1241, + "step": 1149 + }, + { + "epoch": 2.8, + "learning_rate": 2.165772791788379e-05, + "loss": 0.1185, + "step": 1150 + }, + { + "epoch": 2.8, + "learning_rate": 2.1618182004814054e-05, + "loss": 0.1188, + "step": 1151 + }, + { + "epoch": 2.8, + "learning_rate": 2.1578644709609503e-05, + "loss": 0.1063, + "step": 1152 + }, + { + "epoch": 2.81, + "learning_rate": 2.1539116133022773e-05, + "loss": 0.1121, + "step": 1153 + }, + { + "epoch": 2.81, + "learning_rate": 2.1499596375784282e-05, + "loss": 0.1195, + "step": 1154 + }, + { + "epoch": 2.81, + "learning_rate": 2.146008553860197e-05, + "loss": 0.1131, + "step": 1155 + }, + { + "epoch": 2.81, + "learning_rate": 2.142058372216105e-05, + "loss": 0.1156, + "step": 1156 + }, + { + "epoch": 2.82, + "learning_rate": 2.138109102712376e-05, + "loss": 0.1224, + "step": 1157 + }, + { + "epoch": 2.82, + "learning_rate": 2.1341607554129074e-05, + "loss": 0.1217, + "step": 1158 + }, + { + "epoch": 2.82, + "learning_rate": 2.1302133403792482e-05, + "loss": 0.1213, + "step": 1159 + }, + { + "epoch": 2.82, + "learning_rate": 2.1262668676705695e-05, + "loss": 0.1266, + "step": 1160 + }, + { + "epoch": 2.83, + "learning_rate": 2.1223213473436438e-05, + "loss": 0.1141, + "step": 1161 + }, + { + "epoch": 2.83, + "learning_rate": 2.1183767894528136e-05, + "loss": 0.1194, + "step": 1162 + }, + { + "epoch": 2.83, + "learning_rate": 2.1144332040499726e-05, + "loss": 0.1157, + "step": 1163 + }, + { + "epoch": 2.83, + "learning_rate": 2.1104906011845334e-05, + "loss": 0.1156, + "step": 1164 + }, + { + "epoch": 2.84, + "learning_rate": 2.1065489909034065e-05, + "loss": 0.1224, + "step": 1165 + }, + { + "epoch": 2.84, + "learning_rate": 2.1026083832509702e-05, + "loss": 0.1194, + "step": 1166 + }, + { + "epoch": 2.84, + "learning_rate": 2.0986687882690515e-05, + "loss": 0.11, + "step": 1167 + }, + { + "epoch": 2.84, + "learning_rate": 2.094730215996894e-05, + "loss": 0.115, + "step": 1168 + }, + { + "epoch": 2.85, + "learning_rate": 2.090792676471137e-05, + "loss": 0.1267, + "step": 1169 + }, + { + "epoch": 2.85, + "learning_rate": 2.0868561797257878e-05, + "loss": 0.131, + "step": 1170 + }, + { + "epoch": 2.85, + "learning_rate": 2.082920735792195e-05, + "loss": 0.123, + "step": 1171 + }, + { + "epoch": 2.85, + "learning_rate": 2.0789863546990253e-05, + "loss": 0.123, + "step": 1172 + }, + { + "epoch": 2.85, + "learning_rate": 2.0750530464722373e-05, + "loss": 0.126, + "step": 1173 + }, + { + "epoch": 2.86, + "learning_rate": 2.071120821135054e-05, + "loss": 0.1151, + "step": 1174 + }, + { + "epoch": 2.86, + "learning_rate": 2.0671896887079418e-05, + "loss": 0.1242, + "step": 1175 + }, + { + "epoch": 2.86, + "learning_rate": 2.0632596592085804e-05, + "loss": 0.1138, + "step": 1176 + }, + { + "epoch": 2.86, + "learning_rate": 2.0593307426518373e-05, + "loss": 0.1184, + "step": 1177 + }, + { + "epoch": 2.87, + "learning_rate": 2.0554029490497463e-05, + "loss": 0.1216, + "step": 1178 + }, + { + "epoch": 2.87, + "learning_rate": 2.0514762884114784e-05, + "loss": 0.1114, + "step": 1179 + }, + { + "epoch": 2.87, + "learning_rate": 2.047550770743318e-05, + "loss": 0.1239, + "step": 1180 + }, + { + "epoch": 2.87, + "learning_rate": 2.0436264060486366e-05, + "loss": 0.122, + "step": 1181 + }, + { + "epoch": 2.88, + "learning_rate": 2.0397032043278687e-05, + "loss": 0.1221, + "step": 1182 + }, + { + "epoch": 2.88, + "learning_rate": 2.035781175578483e-05, + "loss": 0.1218, + "step": 1183 + }, + { + "epoch": 2.88, + "learning_rate": 2.03186032979496e-05, + "loss": 0.1251, + "step": 1184 + }, + { + "epoch": 2.88, + "learning_rate": 2.0279406769687666e-05, + "loss": 0.1135, + "step": 1185 + }, + { + "epoch": 2.89, + "learning_rate": 2.0240222270883288e-05, + "loss": 0.1229, + "step": 1186 + }, + { + "epoch": 2.89, + "learning_rate": 2.020104990139008e-05, + "loss": 0.1183, + "step": 1187 + }, + { + "epoch": 2.89, + "learning_rate": 2.016188976103074e-05, + "loss": 0.1207, + "step": 1188 + }, + { + "epoch": 2.89, + "learning_rate": 2.0122741949596797e-05, + "loss": 0.1142, + "step": 1189 + }, + { + "epoch": 2.9, + "learning_rate": 2.008360656684837e-05, + "loss": 0.1243, + "step": 1190 + }, + { + "epoch": 2.9, + "learning_rate": 2.0044483712513908e-05, + "loss": 0.1127, + "step": 1191 + }, + { + "epoch": 2.9, + "learning_rate": 2.000537348628993e-05, + "loss": 0.113, + "step": 1192 + }, + { + "epoch": 2.9, + "learning_rate": 1.9966275987840764e-05, + "loss": 0.1221, + "step": 1193 + }, + { + "epoch": 2.91, + "learning_rate": 1.9927191316798332e-05, + "loss": 0.121, + "step": 1194 + }, + { + "epoch": 2.91, + "learning_rate": 1.9888119572761845e-05, + "loss": 0.1184, + "step": 1195 + }, + { + "epoch": 2.91, + "learning_rate": 1.984906085529758e-05, + "loss": 0.1143, + "step": 1196 + }, + { + "epoch": 2.91, + "learning_rate": 1.9810015263938624e-05, + "loss": 0.1155, + "step": 1197 + }, + { + "epoch": 2.92, + "learning_rate": 1.977098289818459e-05, + "loss": 0.1211, + "step": 1198 + }, + { + "epoch": 2.92, + "learning_rate": 1.973196385750141e-05, + "loss": 0.1397, + "step": 1199 + }, + { + "epoch": 2.92, + "learning_rate": 1.969295824132107e-05, + "loss": 0.1072, + "step": 1200 + }, + { + "epoch": 2.92, + "learning_rate": 1.965396614904132e-05, + "loss": 0.1223, + "step": 1201 + }, + { + "epoch": 2.93, + "learning_rate": 1.961498768002547e-05, + "loss": 0.1206, + "step": 1202 + }, + { + "epoch": 2.93, + "learning_rate": 1.9576022933602097e-05, + "loss": 0.1168, + "step": 1203 + }, + { + "epoch": 2.93, + "learning_rate": 1.9537072009064814e-05, + "loss": 0.116, + "step": 1204 + }, + { + "epoch": 2.93, + "learning_rate": 1.949813500567201e-05, + "loss": 0.1186, + "step": 1205 + }, + { + "epoch": 2.94, + "learning_rate": 1.9459212022646606e-05, + "loss": 0.1121, + "step": 1206 + }, + { + "epoch": 2.94, + "learning_rate": 1.9420303159175796e-05, + "loss": 0.1251, + "step": 1207 + }, + { + "epoch": 2.94, + "learning_rate": 1.9381408514410782e-05, + "loss": 0.1256, + "step": 1208 + }, + { + "epoch": 2.94, + "learning_rate": 1.9342528187466548e-05, + "loss": 0.1354, + "step": 1209 + }, + { + "epoch": 2.94, + "learning_rate": 1.9303662277421568e-05, + "loss": 0.1258, + "step": 1210 + }, + { + "epoch": 2.95, + "learning_rate": 1.9264810883317592e-05, + "loss": 0.1149, + "step": 1211 + }, + { + "epoch": 2.95, + "learning_rate": 1.922597410415939e-05, + "loss": 0.1202, + "step": 1212 + }, + { + "epoch": 2.95, + "learning_rate": 1.918715203891448e-05, + "loss": 0.1244, + "step": 1213 + }, + { + "epoch": 2.95, + "learning_rate": 1.9148344786512878e-05, + "loss": 0.1198, + "step": 1214 + }, + { + "epoch": 2.96, + "learning_rate": 1.9109552445846854e-05, + "loss": 0.1153, + "step": 1215 + }, + { + "epoch": 2.96, + "learning_rate": 1.907077511577068e-05, + "loss": 0.1194, + "step": 1216 + }, + { + "epoch": 2.96, + "learning_rate": 1.9032012895100383e-05, + "loss": 0.1181, + "step": 1217 + }, + { + "epoch": 2.96, + "learning_rate": 1.8993265882613482e-05, + "loss": 0.1173, + "step": 1218 + }, + { + "epoch": 2.97, + "learning_rate": 1.8954534177048744e-05, + "loss": 0.1196, + "step": 1219 + }, + { + "epoch": 2.97, + "learning_rate": 1.8915817877105926e-05, + "loss": 0.1218, + "step": 1220 + }, + { + "epoch": 2.97, + "learning_rate": 1.8877117081445524e-05, + "loss": 0.117, + "step": 1221 + }, + { + "epoch": 2.97, + "learning_rate": 1.8838431888688527e-05, + "loss": 0.1167, + "step": 1222 + }, + { + "epoch": 2.98, + "learning_rate": 1.8799762397416158e-05, + "loss": 0.1194, + "step": 1223 + }, + { + "epoch": 2.98, + "learning_rate": 1.8761108706169655e-05, + "loss": 0.1177, + "step": 1224 + }, + { + "epoch": 2.98, + "learning_rate": 1.872247091344996e-05, + "loss": 0.1223, + "step": 1225 + }, + { + "epoch": 2.98, + "learning_rate": 1.8683849117717518e-05, + "loss": 0.1231, + "step": 1226 + }, + { + "epoch": 2.99, + "learning_rate": 1.8645243417391995e-05, + "loss": 0.1212, + "step": 1227 + }, + { + "epoch": 2.99, + "learning_rate": 1.8606653910852056e-05, + "loss": 0.1163, + "step": 1228 + }, + { + "epoch": 2.99, + "learning_rate": 1.856808069643509e-05, + "loss": 0.1265, + "step": 1229 + }, + { + "epoch": 2.99, + "learning_rate": 1.852952387243698e-05, + "loss": 0.1148, + "step": 1230 + }, + { + "epoch": 3.0, + "learning_rate": 1.849098353711183e-05, + "loss": 0.12, + "step": 1231 + }, + { + "epoch": 3.0, + "learning_rate": 1.8452459788671738e-05, + "loss": 0.1195, + "step": 1232 + }, + { + "epoch": 3.0, + "eval_loss": 0.7090210318565369, + "eval_runtime": 116.3629, + "eval_samples_per_second": 6.548, + "eval_steps_per_second": 0.413, + "step": 1232 + }, + { + "epoch": 3.0, + "learning_rate": 1.841395272528651e-05, + "loss": 0.0877, + "step": 1233 + }, + { + "epoch": 3.0, + "learning_rate": 1.8375462445083464e-05, + "loss": 0.0432, + "step": 1234 + }, + { + "epoch": 3.01, + "learning_rate": 1.8336989046147128e-05, + "loss": 0.0427, + "step": 1235 + }, + { + "epoch": 3.01, + "learning_rate": 1.8298532626519007e-05, + "loss": 0.0441, + "step": 1236 + }, + { + "epoch": 3.01, + "learning_rate": 1.826009328419737e-05, + "loss": 0.0398, + "step": 1237 + }, + { + "epoch": 3.01, + "learning_rate": 1.822167111713695e-05, + "loss": 0.0429, + "step": 1238 + }, + { + "epoch": 3.02, + "learning_rate": 1.818326622324869e-05, + "loss": 0.0366, + "step": 1239 + }, + { + "epoch": 3.02, + "learning_rate": 1.814487870039955e-05, + "loss": 0.034, + "step": 1240 + }, + { + "epoch": 3.02, + "learning_rate": 1.81065086464122e-05, + "loss": 0.0367, + "step": 1241 + }, + { + "epoch": 3.02, + "learning_rate": 1.80681561590648e-05, + "loss": 0.0336, + "step": 1242 + }, + { + "epoch": 3.03, + "learning_rate": 1.802982133609077e-05, + "loss": 0.0367, + "step": 1243 + }, + { + "epoch": 3.03, + "learning_rate": 1.7991504275178473e-05, + "loss": 0.0373, + "step": 1244 + }, + { + "epoch": 3.03, + "learning_rate": 1.7953205073971035e-05, + "loss": 0.0351, + "step": 1245 + }, + { + "epoch": 3.03, + "learning_rate": 1.7914923830066074e-05, + "loss": 0.0341, + "step": 1246 + }, + { + "epoch": 3.03, + "learning_rate": 1.7876660641015437e-05, + "loss": 0.0392, + "step": 1247 + }, + { + "epoch": 3.04, + "learning_rate": 1.7838415604324943e-05, + "loss": 0.0373, + "step": 1248 + }, + { + "epoch": 3.04, + "learning_rate": 1.7800188817454208e-05, + "loss": 0.037, + "step": 1249 + }, + { + "epoch": 3.04, + "learning_rate": 1.7761980377816287e-05, + "loss": 0.0337, + "step": 1250 + }, + { + "epoch": 3.04, + "learning_rate": 1.772379038277751e-05, + "loss": 0.0368, + "step": 1251 + }, + { + "epoch": 3.05, + "learning_rate": 1.7685618929657194e-05, + "loss": 0.0413, + "step": 1252 + }, + { + "epoch": 3.05, + "learning_rate": 1.764746611572742e-05, + "loss": 0.0331, + "step": 1253 + }, + { + "epoch": 3.05, + "learning_rate": 1.7609332038212728e-05, + "loss": 0.0329, + "step": 1254 + }, + { + "epoch": 3.05, + "learning_rate": 1.7571216794289984e-05, + "loss": 0.0317, + "step": 1255 + }, + { + "epoch": 3.06, + "learning_rate": 1.7533120481088e-05, + "loss": 0.035, + "step": 1256 + }, + { + "epoch": 3.06, + "learning_rate": 1.7495043195687368e-05, + "loss": 0.0352, + "step": 1257 + }, + { + "epoch": 3.06, + "learning_rate": 1.7456985035120193e-05, + "loss": 0.0373, + "step": 1258 + }, + { + "epoch": 3.06, + "learning_rate": 1.741894609636985e-05, + "loss": 0.0337, + "step": 1259 + }, + { + "epoch": 3.07, + "learning_rate": 1.7380926476370702e-05, + "loss": 0.0368, + "step": 1260 + }, + { + "epoch": 3.07, + "learning_rate": 1.734292627200793e-05, + "loss": 0.0369, + "step": 1261 + }, + { + "epoch": 3.07, + "learning_rate": 1.7304945580117193e-05, + "loss": 0.0316, + "step": 1262 + }, + { + "epoch": 3.07, + "learning_rate": 1.7266984497484458e-05, + "loss": 0.0343, + "step": 1263 + }, + { + "epoch": 3.08, + "learning_rate": 1.7229043120845708e-05, + "loss": 0.035, + "step": 1264 + }, + { + "epoch": 3.08, + "learning_rate": 1.7191121546886697e-05, + "loss": 0.0375, + "step": 1265 + }, + { + "epoch": 3.08, + "learning_rate": 1.7153219872242727e-05, + "loss": 0.0331, + "step": 1266 + }, + { + "epoch": 3.08, + "learning_rate": 1.711533819349842e-05, + "loss": 0.0323, + "step": 1267 + }, + { + "epoch": 3.09, + "learning_rate": 1.7077476607187397e-05, + "loss": 0.0329, + "step": 1268 + }, + { + "epoch": 3.09, + "learning_rate": 1.7039635209792105e-05, + "loss": 0.0306, + "step": 1269 + }, + { + "epoch": 3.09, + "learning_rate": 1.7001814097743528e-05, + "loss": 0.0312, + "step": 1270 + }, + { + "epoch": 3.09, + "learning_rate": 1.6964013367420966e-05, + "loss": 0.0314, + "step": 1271 + }, + { + "epoch": 3.1, + "learning_rate": 1.692623311515178e-05, + "loss": 0.0333, + "step": 1272 + }, + { + "epoch": 3.1, + "learning_rate": 1.6888473437211132e-05, + "loss": 0.0365, + "step": 1273 + }, + { + "epoch": 3.1, + "learning_rate": 1.685073442982179e-05, + "loss": 0.03, + "step": 1274 + }, + { + "epoch": 3.1, + "learning_rate": 1.6813016189153814e-05, + "loss": 0.0329, + "step": 1275 + }, + { + "epoch": 3.11, + "learning_rate": 1.6775318811324364e-05, + "loss": 0.0369, + "step": 1276 + }, + { + "epoch": 3.11, + "learning_rate": 1.6737642392397414e-05, + "loss": 0.0331, + "step": 1277 + }, + { + "epoch": 3.11, + "learning_rate": 1.6699987028383546e-05, + "loss": 0.037, + "step": 1278 + }, + { + "epoch": 3.11, + "learning_rate": 1.6662352815239678e-05, + "loss": 0.0362, + "step": 1279 + }, + { + "epoch": 3.12, + "learning_rate": 1.6624739848868854e-05, + "loss": 0.0323, + "step": 1280 + }, + { + "epoch": 3.12, + "learning_rate": 1.6587148225119935e-05, + "loss": 0.0339, + "step": 1281 + }, + { + "epoch": 3.12, + "learning_rate": 1.6549578039787436e-05, + "loss": 0.0306, + "step": 1282 + }, + { + "epoch": 3.12, + "learning_rate": 1.65120293886112e-05, + "loss": 0.0311, + "step": 1283 + }, + { + "epoch": 3.13, + "learning_rate": 1.6474502367276222e-05, + "loss": 0.0361, + "step": 1284 + }, + { + "epoch": 3.13, + "learning_rate": 1.643699707141237e-05, + "loss": 0.0342, + "step": 1285 + }, + { + "epoch": 3.13, + "learning_rate": 1.6399513596594158e-05, + "loss": 0.0307, + "step": 1286 + }, + { + "epoch": 3.13, + "learning_rate": 1.6362052038340475e-05, + "loss": 0.0322, + "step": 1287 + }, + { + "epoch": 3.13, + "learning_rate": 1.6324612492114378e-05, + "loss": 0.0339, + "step": 1288 + }, + { + "epoch": 3.14, + "learning_rate": 1.6287195053322816e-05, + "loss": 0.0331, + "step": 1289 + }, + { + "epoch": 3.14, + "learning_rate": 1.6249799817316415e-05, + "loss": 0.0387, + "step": 1290 + }, + { + "epoch": 3.14, + "learning_rate": 1.6212426879389205e-05, + "loss": 0.035, + "step": 1291 + }, + { + "epoch": 3.14, + "learning_rate": 1.617507633477842e-05, + "loss": 0.0325, + "step": 1292 + }, + { + "epoch": 3.15, + "learning_rate": 1.6137748278664215e-05, + "loss": 0.0334, + "step": 1293 + }, + { + "epoch": 3.15, + "learning_rate": 1.6100442806169422e-05, + "loss": 0.0318, + "step": 1294 + }, + { + "epoch": 3.15, + "learning_rate": 1.6063160012359345e-05, + "loss": 0.0325, + "step": 1295 + }, + { + "epoch": 3.15, + "learning_rate": 1.602589999224149e-05, + "loss": 0.0322, + "step": 1296 + }, + { + "epoch": 3.16, + "learning_rate": 1.598866284076532e-05, + "loss": 0.0326, + "step": 1297 + }, + { + "epoch": 3.16, + "learning_rate": 1.5951448652822047e-05, + "loss": 0.0341, + "step": 1298 + }, + { + "epoch": 3.16, + "learning_rate": 1.5914257523244347e-05, + "loss": 0.0321, + "step": 1299 + }, + { + "epoch": 3.16, + "learning_rate": 1.5877089546806125e-05, + "loss": 0.0318, + "step": 1300 + }, + { + "epoch": 3.17, + "learning_rate": 1.5839944818222295e-05, + "loss": 0.0323, + "step": 1301 + }, + { + "epoch": 3.17, + "learning_rate": 1.5802823432148546e-05, + "loss": 0.0355, + "step": 1302 + }, + { + "epoch": 3.17, + "learning_rate": 1.5765725483181053e-05, + "loss": 0.0349, + "step": 1303 + }, + { + "epoch": 3.17, + "learning_rate": 1.5728651065856297e-05, + "loss": 0.0316, + "step": 1304 + }, + { + "epoch": 3.18, + "learning_rate": 1.569160027465078e-05, + "loss": 0.029, + "step": 1305 + }, + { + "epoch": 3.18, + "learning_rate": 1.5654573203980784e-05, + "loss": 0.0323, + "step": 1306 + }, + { + "epoch": 3.18, + "learning_rate": 1.561756994820216e-05, + "loss": 0.0314, + "step": 1307 + }, + { + "epoch": 3.18, + "learning_rate": 1.5580590601610074e-05, + "loss": 0.0322, + "step": 1308 + }, + { + "epoch": 3.19, + "learning_rate": 1.5543635258438745e-05, + "loss": 0.0319, + "step": 1309 + }, + { + "epoch": 3.19, + "learning_rate": 1.5506704012861256e-05, + "loss": 0.0331, + "step": 1310 + }, + { + "epoch": 3.19, + "learning_rate": 1.546979695898926e-05, + "loss": 0.0312, + "step": 1311 + }, + { + "epoch": 3.19, + "learning_rate": 1.5432914190872757e-05, + "loss": 0.0378, + "step": 1312 + }, + { + "epoch": 3.2, + "learning_rate": 1.5396055802499875e-05, + "loss": 0.0304, + "step": 1313 + }, + { + "epoch": 3.2, + "learning_rate": 1.5359221887796616e-05, + "loss": 0.0327, + "step": 1314 + }, + { + "epoch": 3.2, + "learning_rate": 1.5322412540626592e-05, + "loss": 0.0338, + "step": 1315 + }, + { + "epoch": 3.2, + "learning_rate": 1.528562785479084e-05, + "loss": 0.0297, + "step": 1316 + }, + { + "epoch": 3.21, + "learning_rate": 1.5248867924027534e-05, + "loss": 0.0319, + "step": 1317 + }, + { + "epoch": 3.21, + "learning_rate": 1.5212132842011779e-05, + "loss": 0.0336, + "step": 1318 + }, + { + "epoch": 3.21, + "learning_rate": 1.5175422702355343e-05, + "loss": 0.032, + "step": 1319 + }, + { + "epoch": 3.21, + "learning_rate": 1.5138737598606448e-05, + "loss": 0.0357, + "step": 1320 + }, + { + "epoch": 3.22, + "learning_rate": 1.5102077624249497e-05, + "loss": 0.0327, + "step": 1321 + }, + { + "epoch": 3.22, + "learning_rate": 1.506544287270487e-05, + "loss": 0.0356, + "step": 1322 + }, + { + "epoch": 3.22, + "learning_rate": 1.5028833437328682e-05, + "loss": 0.0289, + "step": 1323 + }, + { + "epoch": 3.22, + "learning_rate": 1.4992249411412513e-05, + "loss": 0.0334, + "step": 1324 + }, + { + "epoch": 3.22, + "learning_rate": 1.4955690888183205e-05, + "loss": 0.0309, + "step": 1325 + }, + { + "epoch": 3.23, + "learning_rate": 1.4919157960802618e-05, + "loss": 0.0363, + "step": 1326 + }, + { + "epoch": 3.23, + "learning_rate": 1.4882650722367364e-05, + "loss": 0.0311, + "step": 1327 + }, + { + "epoch": 3.23, + "learning_rate": 1.4846169265908603e-05, + "loss": 0.0286, + "step": 1328 + }, + { + "epoch": 3.23, + "learning_rate": 1.4809713684391807e-05, + "loss": 0.0344, + "step": 1329 + }, + { + "epoch": 3.24, + "learning_rate": 1.4773284070716503e-05, + "loss": 0.0339, + "step": 1330 + }, + { + "epoch": 3.24, + "learning_rate": 1.4736880517716039e-05, + "loss": 0.0309, + "step": 1331 + }, + { + "epoch": 3.24, + "learning_rate": 1.470050311815736e-05, + "loss": 0.0345, + "step": 1332 + }, + { + "epoch": 3.24, + "learning_rate": 1.4664151964740752e-05, + "loss": 0.0308, + "step": 1333 + }, + { + "epoch": 3.25, + "learning_rate": 1.4627827150099627e-05, + "loss": 0.0331, + "step": 1334 + }, + { + "epoch": 3.25, + "learning_rate": 1.4591528766800283e-05, + "loss": 0.0328, + "step": 1335 + }, + { + "epoch": 3.25, + "learning_rate": 1.4555256907341667e-05, + "loss": 0.0301, + "step": 1336 + }, + { + "epoch": 3.25, + "learning_rate": 1.4519011664155118e-05, + "loss": 0.0303, + "step": 1337 + }, + { + "epoch": 3.26, + "learning_rate": 1.4482793129604148e-05, + "loss": 0.0367, + "step": 1338 + }, + { + "epoch": 3.26, + "learning_rate": 1.4446601395984233e-05, + "loss": 0.0353, + "step": 1339 + }, + { + "epoch": 3.26, + "learning_rate": 1.4410436555522522e-05, + "loss": 0.0292, + "step": 1340 + }, + { + "epoch": 3.26, + "learning_rate": 1.4374298700377665e-05, + "loss": 0.0312, + "step": 1341 + }, + { + "epoch": 3.27, + "learning_rate": 1.4338187922639507e-05, + "loss": 0.0334, + "step": 1342 + }, + { + "epoch": 3.27, + "learning_rate": 1.4302104314328935e-05, + "loss": 0.0326, + "step": 1343 + }, + { + "epoch": 3.27, + "learning_rate": 1.426604796739755e-05, + "loss": 0.032, + "step": 1344 + }, + { + "epoch": 3.27, + "learning_rate": 1.4230018973727535e-05, + "loss": 0.0307, + "step": 1345 + }, + { + "epoch": 3.28, + "learning_rate": 1.4194017425131323e-05, + "loss": 0.03, + "step": 1346 + }, + { + "epoch": 3.28, + "learning_rate": 1.4158043413351455e-05, + "loss": 0.0322, + "step": 1347 + }, + { + "epoch": 3.28, + "learning_rate": 1.4122097030060249e-05, + "loss": 0.0335, + "step": 1348 + }, + { + "epoch": 3.28, + "learning_rate": 1.408617836685967e-05, + "loss": 0.0306, + "step": 1349 + }, + { + "epoch": 3.29, + "learning_rate": 1.405028751528099e-05, + "loss": 0.0299, + "step": 1350 + }, + { + "epoch": 3.29, + "learning_rate": 1.4014424566784661e-05, + "loss": 0.0294, + "step": 1351 + }, + { + "epoch": 3.29, + "learning_rate": 1.397858961275999e-05, + "loss": 0.0326, + "step": 1352 + }, + { + "epoch": 3.29, + "learning_rate": 1.3942782744524973e-05, + "loss": 0.0332, + "step": 1353 + }, + { + "epoch": 3.3, + "learning_rate": 1.3907004053326006e-05, + "loss": 0.0316, + "step": 1354 + }, + { + "epoch": 3.3, + "learning_rate": 1.3871253630337722e-05, + "loss": 0.0325, + "step": 1355 + }, + { + "epoch": 3.3, + "learning_rate": 1.3835531566662673e-05, + "loss": 0.0337, + "step": 1356 + }, + { + "epoch": 3.3, + "learning_rate": 1.379983795333119e-05, + "loss": 0.0306, + "step": 1357 + }, + { + "epoch": 3.31, + "learning_rate": 1.3764172881301062e-05, + "loss": 0.0366, + "step": 1358 + }, + { + "epoch": 3.31, + "learning_rate": 1.3728536441457357e-05, + "loss": 0.0309, + "step": 1359 + }, + { + "epoch": 3.31, + "learning_rate": 1.3692928724612203e-05, + "loss": 0.0266, + "step": 1360 + }, + { + "epoch": 3.31, + "learning_rate": 1.3657349821504517e-05, + "loss": 0.0304, + "step": 1361 + }, + { + "epoch": 3.31, + "learning_rate": 1.3621799822799788e-05, + "loss": 0.0308, + "step": 1362 + }, + { + "epoch": 3.32, + "learning_rate": 1.3586278819089837e-05, + "loss": 0.0344, + "step": 1363 + }, + { + "epoch": 3.32, + "learning_rate": 1.3550786900892634e-05, + "loss": 0.0316, + "step": 1364 + }, + { + "epoch": 3.32, + "learning_rate": 1.3515324158651981e-05, + "loss": 0.0356, + "step": 1365 + }, + { + "epoch": 3.32, + "learning_rate": 1.3479890682737379e-05, + "loss": 0.0307, + "step": 1366 + }, + { + "epoch": 3.33, + "learning_rate": 1.3444486563443723e-05, + "loss": 0.0291, + "step": 1367 + }, + { + "epoch": 3.33, + "learning_rate": 1.3409111890991105e-05, + "loss": 0.0316, + "step": 1368 + }, + { + "epoch": 3.33, + "learning_rate": 1.3373766755524564e-05, + "loss": 0.0318, + "step": 1369 + }, + { + "epoch": 3.33, + "learning_rate": 1.3338451247113897e-05, + "loss": 0.0308, + "step": 1370 + }, + { + "epoch": 3.34, + "learning_rate": 1.330316545575338e-05, + "loss": 0.0324, + "step": 1371 + }, + { + "epoch": 3.34, + "learning_rate": 1.3267909471361572e-05, + "loss": 0.0329, + "step": 1372 + }, + { + "epoch": 3.34, + "learning_rate": 1.323268338378108e-05, + "loss": 0.0317, + "step": 1373 + }, + { + "epoch": 3.34, + "learning_rate": 1.3197487282778315e-05, + "loss": 0.0336, + "step": 1374 + }, + { + "epoch": 3.35, + "learning_rate": 1.3162321258043261e-05, + "loss": 0.0318, + "step": 1375 + }, + { + "epoch": 3.35, + "learning_rate": 1.3127185399189295e-05, + "loss": 0.0363, + "step": 1376 + }, + { + "epoch": 3.35, + "learning_rate": 1.3092079795752887e-05, + "loss": 0.0336, + "step": 1377 + }, + { + "epoch": 3.35, + "learning_rate": 1.3057004537193423e-05, + "loss": 0.0299, + "step": 1378 + }, + { + "epoch": 3.36, + "learning_rate": 1.3021959712892979e-05, + "loss": 0.0321, + "step": 1379 + }, + { + "epoch": 3.36, + "learning_rate": 1.2986945412156038e-05, + "loss": 0.0309, + "step": 1380 + }, + { + "epoch": 3.36, + "learning_rate": 1.2951961724209317e-05, + "loss": 0.0324, + "step": 1381 + }, + { + "epoch": 3.36, + "learning_rate": 1.2917008738201537e-05, + "loss": 0.0296, + "step": 1382 + }, + { + "epoch": 3.37, + "learning_rate": 1.2882086543203154e-05, + "loss": 0.031, + "step": 1383 + }, + { + "epoch": 3.37, + "learning_rate": 1.284719522820618e-05, + "loss": 0.0281, + "step": 1384 + }, + { + "epoch": 3.37, + "learning_rate": 1.2812334882123944e-05, + "loss": 0.0302, + "step": 1385 + }, + { + "epoch": 3.37, + "learning_rate": 1.2777505593790834e-05, + "loss": 0.0317, + "step": 1386 + }, + { + "epoch": 3.38, + "learning_rate": 1.2742707451962088e-05, + "loss": 0.0308, + "step": 1387 + }, + { + "epoch": 3.38, + "learning_rate": 1.2707940545313613e-05, + "loss": 0.0312, + "step": 1388 + }, + { + "epoch": 3.38, + "learning_rate": 1.2673204962441671e-05, + "loss": 0.0294, + "step": 1389 + }, + { + "epoch": 3.38, + "learning_rate": 1.263850079186274e-05, + "loss": 0.0312, + "step": 1390 + }, + { + "epoch": 3.39, + "learning_rate": 1.2603828122013246e-05, + "loss": 0.0329, + "step": 1391 + }, + { + "epoch": 3.39, + "learning_rate": 1.2569187041249315e-05, + "loss": 0.0285, + "step": 1392 + }, + { + "epoch": 3.39, + "learning_rate": 1.253457763784659e-05, + "loss": 0.0304, + "step": 1393 + }, + { + "epoch": 3.39, + "learning_rate": 1.2500000000000006e-05, + "loss": 0.0295, + "step": 1394 + }, + { + "epoch": 3.4, + "learning_rate": 1.246545421582353e-05, + "loss": 0.0317, + "step": 1395 + }, + { + "epoch": 3.4, + "learning_rate": 1.2430940373349945e-05, + "loss": 0.0315, + "step": 1396 + }, + { + "epoch": 3.4, + "learning_rate": 1.2396458560530694e-05, + "loss": 0.0333, + "step": 1397 + }, + { + "epoch": 3.4, + "learning_rate": 1.2362008865235536e-05, + "loss": 0.0306, + "step": 1398 + }, + { + "epoch": 3.4, + "learning_rate": 1.2327591375252403e-05, + "loss": 0.0301, + "step": 1399 + }, + { + "epoch": 3.41, + "learning_rate": 1.2293206178287184e-05, + "loss": 0.0311, + "step": 1400 + }, + { + "epoch": 3.41, + "learning_rate": 1.2258853361963448e-05, + "loss": 0.0328, + "step": 1401 + }, + { + "epoch": 3.41, + "learning_rate": 1.2224533013822238e-05, + "loss": 0.0303, + "step": 1402 + }, + { + "epoch": 3.41, + "learning_rate": 1.2190245221321912e-05, + "loss": 0.0355, + "step": 1403 + }, + { + "epoch": 3.42, + "learning_rate": 1.2155990071837817e-05, + "loss": 0.0314, + "step": 1404 + }, + { + "epoch": 3.42, + "learning_rate": 1.2121767652662123e-05, + "loss": 0.0331, + "step": 1405 + }, + { + "epoch": 3.42, + "learning_rate": 1.2087578051003617e-05, + "loss": 0.0345, + "step": 1406 + }, + { + "epoch": 3.42, + "learning_rate": 1.2053421353987437e-05, + "loss": 0.0276, + "step": 1407 + }, + { + "epoch": 3.43, + "learning_rate": 1.2019297648654857e-05, + "loss": 0.0291, + "step": 1408 + }, + { + "epoch": 3.43, + "learning_rate": 1.198520702196313e-05, + "loss": 0.0306, + "step": 1409 + }, + { + "epoch": 3.43, + "learning_rate": 1.1951149560785167e-05, + "loss": 0.0319, + "step": 1410 + }, + { + "epoch": 3.43, + "learning_rate": 1.191712535190937e-05, + "loss": 0.0301, + "step": 1411 + }, + { + "epoch": 3.44, + "learning_rate": 1.188313448203943e-05, + "loss": 0.0246, + "step": 1412 + }, + { + "epoch": 3.44, + "learning_rate": 1.1849177037794051e-05, + "loss": 0.0307, + "step": 1413 + }, + { + "epoch": 3.44, + "learning_rate": 1.181525310570677e-05, + "loss": 0.0311, + "step": 1414 + }, + { + "epoch": 3.44, + "learning_rate": 1.178136277222573e-05, + "loss": 0.0298, + "step": 1415 + }, + { + "epoch": 3.45, + "learning_rate": 1.1747506123713458e-05, + "loss": 0.0332, + "step": 1416 + }, + { + "epoch": 3.45, + "learning_rate": 1.1713683246446622e-05, + "loss": 0.0277, + "step": 1417 + }, + { + "epoch": 3.45, + "learning_rate": 1.1679894226615862e-05, + "loss": 0.0343, + "step": 1418 + }, + { + "epoch": 3.45, + "learning_rate": 1.1646139150325507e-05, + "loss": 0.0324, + "step": 1419 + }, + { + "epoch": 3.46, + "learning_rate": 1.16124181035934e-05, + "loss": 0.0311, + "step": 1420 + }, + { + "epoch": 3.46, + "learning_rate": 1.157873117235067e-05, + "loss": 0.0322, + "step": 1421 + }, + { + "epoch": 3.46, + "learning_rate": 1.1545078442441526e-05, + "loss": 0.0311, + "step": 1422 + }, + { + "epoch": 3.46, + "learning_rate": 1.1511459999622981e-05, + "loss": 0.033, + "step": 1423 + }, + { + "epoch": 3.47, + "learning_rate": 1.147787592956472e-05, + "loss": 0.0335, + "step": 1424 + }, + { + "epoch": 3.47, + "learning_rate": 1.14443263178488e-05, + "loss": 0.0307, + "step": 1425 + }, + { + "epoch": 3.47, + "learning_rate": 1.1410811249969475e-05, + "loss": 0.0314, + "step": 1426 + }, + { + "epoch": 3.47, + "learning_rate": 1.1377330811332988e-05, + "loss": 0.0313, + "step": 1427 + }, + { + "epoch": 3.48, + "learning_rate": 1.1343885087257337e-05, + "loss": 0.03, + "step": 1428 + }, + { + "epoch": 3.48, + "learning_rate": 1.1310474162972026e-05, + "loss": 0.0284, + "step": 1429 + }, + { + "epoch": 3.48, + "learning_rate": 1.1277098123617922e-05, + "loss": 0.032, + "step": 1430 + }, + { + "epoch": 3.48, + "learning_rate": 1.124375705424696e-05, + "loss": 0.0383, + "step": 1431 + }, + { + "epoch": 3.49, + "learning_rate": 1.1210451039821965e-05, + "loss": 0.0352, + "step": 1432 + }, + { + "epoch": 3.49, + "learning_rate": 1.117718016521645e-05, + "loss": 0.0317, + "step": 1433 + }, + { + "epoch": 3.49, + "learning_rate": 1.1143944515214386e-05, + "loss": 0.0316, + "step": 1434 + }, + { + "epoch": 3.49, + "learning_rate": 1.1110744174509952e-05, + "loss": 0.0285, + "step": 1435 + }, + { + "epoch": 3.49, + "learning_rate": 1.1077579227707357e-05, + "loss": 0.0301, + "step": 1436 + }, + { + "epoch": 3.5, + "learning_rate": 1.104444975932064e-05, + "loss": 0.0307, + "step": 1437 + }, + { + "epoch": 3.5, + "learning_rate": 1.10113558537734e-05, + "loss": 0.0309, + "step": 1438 + }, + { + "epoch": 3.5, + "learning_rate": 1.0978297595398632e-05, + "loss": 0.0328, + "step": 1439 + }, + { + "epoch": 3.5, + "learning_rate": 1.094527506843849e-05, + "loss": 0.0277, + "step": 1440 + }, + { + "epoch": 3.51, + "learning_rate": 1.0912288357044062e-05, + "loss": 0.03, + "step": 1441 + }, + { + "epoch": 3.51, + "learning_rate": 1.0879337545275165e-05, + "loss": 0.0302, + "step": 1442 + }, + { + "epoch": 3.51, + "learning_rate": 1.084642271710016e-05, + "loss": 0.0309, + "step": 1443 + }, + { + "epoch": 3.51, + "learning_rate": 1.0813543956395675e-05, + "loss": 0.0292, + "step": 1444 + }, + { + "epoch": 3.52, + "learning_rate": 1.0780701346946453e-05, + "loss": 0.0297, + "step": 1445 + }, + { + "epoch": 3.52, + "learning_rate": 1.074789497244512e-05, + "loss": 0.0303, + "step": 1446 + }, + { + "epoch": 3.52, + "learning_rate": 1.0715124916491937e-05, + "loss": 0.0309, + "step": 1447 + }, + { + "epoch": 3.52, + "learning_rate": 1.0682391262594618e-05, + "loss": 0.0278, + "step": 1448 + }, + { + "epoch": 3.53, + "learning_rate": 1.0649694094168147e-05, + "loss": 0.0285, + "step": 1449 + }, + { + "epoch": 3.53, + "learning_rate": 1.0617033494534486e-05, + "loss": 0.0329, + "step": 1450 + }, + { + "epoch": 3.53, + "learning_rate": 1.0584409546922445e-05, + "loss": 0.0299, + "step": 1451 + }, + { + "epoch": 3.53, + "learning_rate": 1.0551822334467429e-05, + "loss": 0.0333, + "step": 1452 + }, + { + "epoch": 3.54, + "learning_rate": 1.0519271940211215e-05, + "loss": 0.0296, + "step": 1453 + }, + { + "epoch": 3.54, + "learning_rate": 1.0486758447101751e-05, + "loss": 0.0322, + "step": 1454 + }, + { + "epoch": 3.54, + "learning_rate": 1.0454281937992989e-05, + "loss": 0.031, + "step": 1455 + }, + { + "epoch": 3.54, + "learning_rate": 1.0421842495644587e-05, + "loss": 0.0293, + "step": 1456 + }, + { + "epoch": 3.55, + "learning_rate": 1.0389440202721778e-05, + "loss": 0.0341, + "step": 1457 + }, + { + "epoch": 3.55, + "learning_rate": 1.035707514179513e-05, + "loss": 0.0311, + "step": 1458 + }, + { + "epoch": 3.55, + "learning_rate": 1.0324747395340309e-05, + "loss": 0.0335, + "step": 1459 + }, + { + "epoch": 3.55, + "learning_rate": 1.0292457045737895e-05, + "loss": 0.0347, + "step": 1460 + }, + { + "epoch": 3.56, + "learning_rate": 1.02602041752732e-05, + "loss": 0.0323, + "step": 1461 + }, + { + "epoch": 3.56, + "learning_rate": 1.0227988866135996e-05, + "loss": 0.0282, + "step": 1462 + }, + { + "epoch": 3.56, + "learning_rate": 1.0195811200420333e-05, + "loss": 0.0285, + "step": 1463 + }, + { + "epoch": 3.56, + "learning_rate": 1.0163671260124385e-05, + "loss": 0.0313, + "step": 1464 + }, + { + "epoch": 3.57, + "learning_rate": 1.0131569127150142e-05, + "loss": 0.0255, + "step": 1465 + }, + { + "epoch": 3.57, + "learning_rate": 1.0099504883303254e-05, + "loss": 0.0303, + "step": 1466 + }, + { + "epoch": 3.57, + "learning_rate": 1.0067478610292847e-05, + "loss": 0.0282, + "step": 1467 + }, + { + "epoch": 3.57, + "learning_rate": 1.0035490389731255e-05, + "loss": 0.0268, + "step": 1468 + }, + { + "epoch": 3.58, + "learning_rate": 1.0003540303133843e-05, + "loss": 0.0289, + "step": 1469 + }, + { + "epoch": 3.58, + "learning_rate": 9.971628431918845e-06, + "loss": 0.0339, + "step": 1470 + }, + { + "epoch": 3.58, + "learning_rate": 9.939754857407063e-06, + "loss": 0.0288, + "step": 1471 + }, + { + "epoch": 3.58, + "learning_rate": 9.90791966082171e-06, + "loss": 0.0332, + "step": 1472 + }, + { + "epoch": 3.59, + "learning_rate": 9.876122923288239e-06, + "loss": 0.0286, + "step": 1473 + }, + { + "epoch": 3.59, + "learning_rate": 9.844364725834057e-06, + "loss": 0.0265, + "step": 1474 + }, + { + "epoch": 3.59, + "learning_rate": 9.812645149388363e-06, + "loss": 0.0325, + "step": 1475 + }, + { + "epoch": 3.59, + "learning_rate": 9.780964274781984e-06, + "loss": 0.0295, + "step": 1476 + }, + { + "epoch": 3.59, + "learning_rate": 9.749322182747072e-06, + "loss": 0.0308, + "step": 1477 + }, + { + "epoch": 3.6, + "learning_rate": 9.71771895391696e-06, + "loss": 0.0317, + "step": 1478 + }, + { + "epoch": 3.6, + "learning_rate": 9.686154668825973e-06, + "loss": 0.0275, + "step": 1479 + }, + { + "epoch": 3.6, + "learning_rate": 9.654629407909163e-06, + "loss": 0.0283, + "step": 1480 + }, + { + "epoch": 3.6, + "learning_rate": 9.623143251502148e-06, + "loss": 0.0287, + "step": 1481 + }, + { + "epoch": 3.61, + "learning_rate": 9.591696279840906e-06, + "loss": 0.0304, + "step": 1482 + }, + { + "epoch": 3.61, + "learning_rate": 9.560288573061563e-06, + "loss": 0.037, + "step": 1483 + }, + { + "epoch": 3.61, + "learning_rate": 9.52892021120016e-06, + "loss": 0.0308, + "step": 1484 + }, + { + "epoch": 3.61, + "learning_rate": 9.497591274192508e-06, + "loss": 0.0303, + "step": 1485 + }, + { + "epoch": 3.62, + "learning_rate": 9.46630184187393e-06, + "loss": 0.0295, + "step": 1486 + }, + { + "epoch": 3.62, + "learning_rate": 9.435051993979077e-06, + "loss": 0.0314, + "step": 1487 + }, + { + "epoch": 3.62, + "learning_rate": 9.403841810141747e-06, + "loss": 0.0323, + "step": 1488 + }, + { + "epoch": 3.62, + "learning_rate": 9.372671369894661e-06, + "loss": 0.0294, + "step": 1489 + }, + { + "epoch": 3.63, + "learning_rate": 9.341540752669235e-06, + "loss": 0.0316, + "step": 1490 + }, + { + "epoch": 3.63, + "learning_rate": 9.310450037795435e-06, + "loss": 0.0279, + "step": 1491 + }, + { + "epoch": 3.63, + "learning_rate": 9.279399304501526e-06, + "loss": 0.0302, + "step": 1492 + }, + { + "epoch": 3.63, + "learning_rate": 9.248388631913887e-06, + "loss": 0.0302, + "step": 1493 + }, + { + "epoch": 3.64, + "learning_rate": 9.21741809905682e-06, + "loss": 0.0298, + "step": 1494 + }, + { + "epoch": 3.64, + "learning_rate": 9.186487784852349e-06, + "loss": 0.029, + "step": 1495 + }, + { + "epoch": 3.64, + "learning_rate": 9.155597768119978e-06, + "loss": 0.0307, + "step": 1496 + }, + { + "epoch": 3.64, + "learning_rate": 9.124748127576552e-06, + "loss": 0.0299, + "step": 1497 + }, + { + "epoch": 3.65, + "learning_rate": 9.09393894183601e-06, + "loss": 0.0307, + "step": 1498 + }, + { + "epoch": 3.65, + "learning_rate": 9.063170289409192e-06, + "loss": 0.0293, + "step": 1499 + }, + { + "epoch": 3.65, + "learning_rate": 9.032442248703666e-06, + "loss": 0.0323, + "step": 1500 + }, + { + "epoch": 3.65, + "learning_rate": 9.001754898023512e-06, + "loss": 0.0289, + "step": 1501 + }, + { + "epoch": 3.66, + "learning_rate": 8.971108315569094e-06, + "loss": 0.0323, + "step": 1502 + }, + { + "epoch": 3.66, + "learning_rate": 8.940502579436913e-06, + "loss": 0.0276, + "step": 1503 + }, + { + "epoch": 3.66, + "learning_rate": 8.90993776761937e-06, + "loss": 0.0294, + "step": 1504 + }, + { + "epoch": 3.66, + "learning_rate": 8.879413958004566e-06, + "loss": 0.036, + "step": 1505 + }, + { + "epoch": 3.67, + "learning_rate": 8.848931228376136e-06, + "loss": 0.0305, + "step": 1506 + }, + { + "epoch": 3.67, + "learning_rate": 8.818489656413043e-06, + "loss": 0.0326, + "step": 1507 + }, + { + "epoch": 3.67, + "learning_rate": 8.788089319689324e-06, + "loss": 0.0323, + "step": 1508 + }, + { + "epoch": 3.67, + "learning_rate": 8.757730295673985e-06, + "loss": 0.0318, + "step": 1509 + }, + { + "epoch": 3.68, + "learning_rate": 8.727412661730724e-06, + "loss": 0.0285, + "step": 1510 + }, + { + "epoch": 3.68, + "learning_rate": 8.697136495117763e-06, + "loss": 0.0315, + "step": 1511 + }, + { + "epoch": 3.68, + "learning_rate": 8.666901872987676e-06, + "loss": 0.0302, + "step": 1512 + }, + { + "epoch": 3.68, + "learning_rate": 8.63670887238716e-06, + "loss": 0.0298, + "step": 1513 + }, + { + "epoch": 3.68, + "learning_rate": 8.606557570256843e-06, + "loss": 0.0332, + "step": 1514 + }, + { + "epoch": 3.69, + "learning_rate": 8.576448043431082e-06, + "loss": 0.0327, + "step": 1515 + }, + { + "epoch": 3.69, + "learning_rate": 8.546380368637812e-06, + "loss": 0.0314, + "step": 1516 + }, + { + "epoch": 3.69, + "learning_rate": 8.51635462249828e-06, + "loss": 0.0287, + "step": 1517 + }, + { + "epoch": 3.69, + "learning_rate": 8.486370881526917e-06, + "loss": 0.0282, + "step": 1518 + }, + { + "epoch": 3.7, + "learning_rate": 8.456429222131082e-06, + "loss": 0.0327, + "step": 1519 + }, + { + "epoch": 3.7, + "learning_rate": 8.426529720610934e-06, + "loss": 0.0285, + "step": 1520 + }, + { + "epoch": 3.7, + "learning_rate": 8.396672453159163e-06, + "loss": 0.0258, + "step": 1521 + }, + { + "epoch": 3.7, + "learning_rate": 8.36685749586087e-06, + "loss": 0.0292, + "step": 1522 + }, + { + "epoch": 3.71, + "learning_rate": 8.337084924693303e-06, + "loss": 0.0266, + "step": 1523 + }, + { + "epoch": 3.71, + "learning_rate": 8.307354815525731e-06, + "loss": 0.0299, + "step": 1524 + }, + { + "epoch": 3.71, + "learning_rate": 8.277667244119187e-06, + "loss": 0.0293, + "step": 1525 + }, + { + "epoch": 3.71, + "learning_rate": 8.24802228612633e-06, + "loss": 0.0298, + "step": 1526 + }, + { + "epoch": 3.72, + "learning_rate": 8.218420017091208e-06, + "loss": 0.0274, + "step": 1527 + }, + { + "epoch": 3.72, + "learning_rate": 8.188860512449107e-06, + "loss": 0.0281, + "step": 1528 + }, + { + "epoch": 3.72, + "learning_rate": 8.159343847526308e-06, + "loss": 0.026, + "step": 1529 + }, + { + "epoch": 3.72, + "learning_rate": 8.129870097539951e-06, + "loss": 0.0282, + "step": 1530 + }, + { + "epoch": 3.73, + "learning_rate": 8.100439337597798e-06, + "loss": 0.0328, + "step": 1531 + }, + { + "epoch": 3.73, + "learning_rate": 8.071051642698074e-06, + "loss": 0.0301, + "step": 1532 + }, + { + "epoch": 3.73, + "learning_rate": 8.041707087729244e-06, + "loss": 0.0283, + "step": 1533 + }, + { + "epoch": 3.73, + "learning_rate": 8.012405747469862e-06, + "loss": 0.032, + "step": 1534 + }, + { + "epoch": 3.74, + "learning_rate": 7.983147696588339e-06, + "loss": 0.0303, + "step": 1535 + }, + { + "epoch": 3.74, + "learning_rate": 7.953933009642773e-06, + "loss": 0.0317, + "step": 1536 + }, + { + "epoch": 3.74, + "learning_rate": 7.924761761080768e-06, + "loss": 0.029, + "step": 1537 + }, + { + "epoch": 3.74, + "learning_rate": 7.895634025239243e-06, + "loss": 0.034, + "step": 1538 + }, + { + "epoch": 3.75, + "learning_rate": 7.866549876344201e-06, + "loss": 0.0298, + "step": 1539 + }, + { + "epoch": 3.75, + "learning_rate": 7.837509388510611e-06, + "loss": 0.03, + "step": 1540 + }, + { + "epoch": 3.75, + "learning_rate": 7.808512635742157e-06, + "loss": 0.027, + "step": 1541 + }, + { + "epoch": 3.75, + "learning_rate": 7.779559691931066e-06, + "loss": 0.0287, + "step": 1542 + }, + { + "epoch": 3.76, + "learning_rate": 7.750650630857947e-06, + "loss": 0.0319, + "step": 1543 + }, + { + "epoch": 3.76, + "learning_rate": 7.721785526191588e-06, + "loss": 0.0297, + "step": 1544 + }, + { + "epoch": 3.76, + "learning_rate": 7.692964451488734e-06, + "loss": 0.0292, + "step": 1545 + }, + { + "epoch": 3.76, + "learning_rate": 7.66418748019396e-06, + "loss": 0.0264, + "step": 1546 + }, + { + "epoch": 3.77, + "learning_rate": 7.63545468563943e-06, + "loss": 0.0296, + "step": 1547 + }, + { + "epoch": 3.77, + "learning_rate": 7.606766141044733e-06, + "loss": 0.0279, + "step": 1548 + }, + { + "epoch": 3.77, + "learning_rate": 7.578121919516712e-06, + "loss": 0.0273, + "step": 1549 + }, + { + "epoch": 3.77, + "learning_rate": 7.54952209404926e-06, + "loss": 0.0302, + "step": 1550 + }, + { + "epoch": 3.77, + "learning_rate": 7.520966737523116e-06, + "loss": 0.024, + "step": 1551 + }, + { + "epoch": 3.78, + "learning_rate": 7.4924559227057265e-06, + "loss": 0.033, + "step": 1552 + }, + { + "epoch": 3.78, + "learning_rate": 7.463989722251014e-06, + "loss": 0.0283, + "step": 1553 + }, + { + "epoch": 3.78, + "learning_rate": 7.435568208699203e-06, + "loss": 0.0268, + "step": 1554 + }, + { + "epoch": 3.78, + "learning_rate": 7.407191454476667e-06, + "loss": 0.0264, + "step": 1555 + }, + { + "epoch": 3.79, + "learning_rate": 7.37885953189571e-06, + "loss": 0.0283, + "step": 1556 + }, + { + "epoch": 3.79, + "learning_rate": 7.350572513154377e-06, + "loss": 0.0288, + "step": 1557 + }, + { + "epoch": 3.79, + "learning_rate": 7.3223304703363135e-06, + "loss": 0.0291, + "step": 1558 + }, + { + "epoch": 3.79, + "learning_rate": 7.294133475410528e-06, + "loss": 0.0288, + "step": 1559 + }, + { + "epoch": 3.8, + "learning_rate": 7.265981600231234e-06, + "loss": 0.029, + "step": 1560 + }, + { + "epoch": 3.8, + "learning_rate": 7.23787491653769e-06, + "loss": 0.0321, + "step": 1561 + }, + { + "epoch": 3.8, + "learning_rate": 7.209813495953963e-06, + "loss": 0.0284, + "step": 1562 + }, + { + "epoch": 3.8, + "learning_rate": 7.181797409988802e-06, + "loss": 0.0291, + "step": 1563 + }, + { + "epoch": 3.81, + "learning_rate": 7.153826730035423e-06, + "loss": 0.0289, + "step": 1564 + }, + { + "epoch": 3.81, + "learning_rate": 7.125901527371329e-06, + "loss": 0.0286, + "step": 1565 + }, + { + "epoch": 3.81, + "learning_rate": 7.0980218731581255e-06, + "loss": 0.0292, + "step": 1566 + }, + { + "epoch": 3.81, + "learning_rate": 7.070187838441369e-06, + "loss": 0.0299, + "step": 1567 + }, + { + "epoch": 3.82, + "learning_rate": 7.042399494150342e-06, + "loss": 0.0293, + "step": 1568 + }, + { + "epoch": 3.82, + "learning_rate": 7.0146569110979086e-06, + "loss": 0.0291, + "step": 1569 + }, + { + "epoch": 3.82, + "learning_rate": 6.986960159980327e-06, + "loss": 0.0306, + "step": 1570 + }, + { + "epoch": 3.82, + "learning_rate": 6.959309311377038e-06, + "loss": 0.0302, + "step": 1571 + }, + { + "epoch": 3.83, + "learning_rate": 6.931704435750522e-06, + "loss": 0.0352, + "step": 1572 + }, + { + "epoch": 3.83, + "learning_rate": 6.904145603446116e-06, + "loss": 0.0274, + "step": 1573 + }, + { + "epoch": 3.83, + "learning_rate": 6.876632884691803e-06, + "loss": 0.0321, + "step": 1574 + }, + { + "epoch": 3.83, + "learning_rate": 6.849166349598079e-06, + "loss": 0.0282, + "step": 1575 + }, + { + "epoch": 3.84, + "learning_rate": 6.821746068157741e-06, + "loss": 0.0289, + "step": 1576 + }, + { + "epoch": 3.84, + "learning_rate": 6.794372110245717e-06, + "loss": 0.029, + "step": 1577 + }, + { + "epoch": 3.84, + "learning_rate": 6.767044545618878e-06, + "loss": 0.0286, + "step": 1578 + }, + { + "epoch": 3.84, + "learning_rate": 6.739763443915895e-06, + "loss": 0.0288, + "step": 1579 + }, + { + "epoch": 3.85, + "learning_rate": 6.712528874657012e-06, + "loss": 0.0274, + "step": 1580 + }, + { + "epoch": 3.85, + "learning_rate": 6.685340907243915e-06, + "loss": 0.0312, + "step": 1581 + }, + { + "epoch": 3.85, + "learning_rate": 6.658199610959537e-06, + "loss": 0.032, + "step": 1582 + }, + { + "epoch": 3.85, + "learning_rate": 6.6311050549678595e-06, + "loss": 0.0284, + "step": 1583 + }, + { + "epoch": 3.86, + "learning_rate": 6.604057308313763e-06, + "loss": 0.0303, + "step": 1584 + }, + { + "epoch": 3.86, + "learning_rate": 6.577056439922857e-06, + "loss": 0.0278, + "step": 1585 + }, + { + "epoch": 3.86, + "learning_rate": 6.55010251860127e-06, + "loss": 0.0302, + "step": 1586 + }, + { + "epoch": 3.86, + "learning_rate": 6.523195613035521e-06, + "loss": 0.0285, + "step": 1587 + }, + { + "epoch": 3.86, + "learning_rate": 6.496335791792293e-06, + "loss": 0.0279, + "step": 1588 + }, + { + "epoch": 3.87, + "learning_rate": 6.469523123318308e-06, + "loss": 0.0302, + "step": 1589 + }, + { + "epoch": 3.87, + "learning_rate": 6.442757675940109e-06, + "loss": 0.0319, + "step": 1590 + }, + { + "epoch": 3.87, + "learning_rate": 6.4160395178639196e-06, + "loss": 0.0297, + "step": 1591 + }, + { + "epoch": 3.87, + "learning_rate": 6.389368717175448e-06, + "loss": 0.0272, + "step": 1592 + }, + { + "epoch": 3.88, + "learning_rate": 6.362745341839729e-06, + "loss": 0.0292, + "step": 1593 + }, + { + "epoch": 3.88, + "learning_rate": 6.336169459700933e-06, + "loss": 0.027, + "step": 1594 + }, + { + "epoch": 3.88, + "learning_rate": 6.309641138482222e-06, + "loss": 0.0302, + "step": 1595 + }, + { + "epoch": 3.88, + "learning_rate": 6.283160445785532e-06, + "loss": 0.0258, + "step": 1596 + }, + { + "epoch": 3.89, + "learning_rate": 6.25672744909146e-06, + "loss": 0.028, + "step": 1597 + }, + { + "epoch": 3.89, + "learning_rate": 6.230342215759028e-06, + "loss": 0.0255, + "step": 1598 + }, + { + "epoch": 3.89, + "learning_rate": 6.204004813025568e-06, + "loss": 0.0309, + "step": 1599 + }, + { + "epoch": 3.89, + "learning_rate": 6.177715308006505e-06, + "loss": 0.0329, + "step": 1600 + }, + { + "epoch": 3.9, + "learning_rate": 6.151473767695229e-06, + "loss": 0.0294, + "step": 1601 + }, + { + "epoch": 3.9, + "learning_rate": 6.125280258962873e-06, + "loss": 0.0273, + "step": 1602 + }, + { + "epoch": 3.9, + "learning_rate": 6.099134848558208e-06, + "loss": 0.0266, + "step": 1603 + }, + { + "epoch": 3.9, + "learning_rate": 6.073037603107404e-06, + "loss": 0.0281, + "step": 1604 + }, + { + "epoch": 3.91, + "learning_rate": 6.0469885891139e-06, + "loss": 0.0286, + "step": 1605 + }, + { + "epoch": 3.91, + "learning_rate": 6.020987872958236e-06, + "loss": 0.0251, + "step": 1606 + }, + { + "epoch": 3.91, + "learning_rate": 5.995035520897882e-06, + "loss": 0.0266, + "step": 1607 + }, + { + "epoch": 3.91, + "learning_rate": 5.969131599067044e-06, + "loss": 0.0288, + "step": 1608 + }, + { + "epoch": 3.92, + "learning_rate": 5.943276173476509e-06, + "loss": 0.0282, + "step": 1609 + }, + { + "epoch": 3.92, + "learning_rate": 5.91746931001351e-06, + "loss": 0.0272, + "step": 1610 + }, + { + "epoch": 3.92, + "learning_rate": 5.891711074441495e-06, + "loss": 0.0275, + "step": 1611 + }, + { + "epoch": 3.92, + "learning_rate": 5.866001532400023e-06, + "loss": 0.0284, + "step": 1612 + }, + { + "epoch": 3.93, + "learning_rate": 5.84034074940456e-06, + "loss": 0.029, + "step": 1613 + }, + { + "epoch": 3.93, + "learning_rate": 5.814728790846308e-06, + "loss": 0.0275, + "step": 1614 + }, + { + "epoch": 3.93, + "learning_rate": 5.789165721992052e-06, + "loss": 0.0286, + "step": 1615 + }, + { + "epoch": 3.93, + "learning_rate": 5.763651607984008e-06, + "loss": 0.0252, + "step": 1616 + }, + { + "epoch": 3.94, + "learning_rate": 5.738186513839619e-06, + "loss": 0.0278, + "step": 1617 + }, + { + "epoch": 3.94, + "learning_rate": 5.712770504451426e-06, + "loss": 0.0271, + "step": 1618 + }, + { + "epoch": 3.94, + "learning_rate": 5.687403644586891e-06, + "loss": 0.0298, + "step": 1619 + }, + { + "epoch": 3.94, + "learning_rate": 5.662085998888214e-06, + "loss": 0.0299, + "step": 1620 + }, + { + "epoch": 3.95, + "learning_rate": 5.636817631872185e-06, + "loss": 0.0254, + "step": 1621 + }, + { + "epoch": 3.95, + "learning_rate": 5.611598607930032e-06, + "loss": 0.027, + "step": 1622 + }, + { + "epoch": 3.95, + "learning_rate": 5.586428991327223e-06, + "loss": 0.0297, + "step": 1623 + }, + { + "epoch": 3.95, + "learning_rate": 5.561308846203333e-06, + "loss": 0.0311, + "step": 1624 + }, + { + "epoch": 3.95, + "learning_rate": 5.5362382365718775e-06, + "loss": 0.0297, + "step": 1625 + }, + { + "epoch": 3.96, + "learning_rate": 5.511217226320125e-06, + "loss": 0.0296, + "step": 1626 + }, + { + "epoch": 3.96, + "learning_rate": 5.486245879208945e-06, + "loss": 0.0279, + "step": 1627 + }, + { + "epoch": 3.96, + "learning_rate": 5.46132425887268e-06, + "loss": 0.0267, + "step": 1628 + }, + { + "epoch": 3.96, + "learning_rate": 5.436452428818919e-06, + "loss": 0.0269, + "step": 1629 + }, + { + "epoch": 3.97, + "learning_rate": 5.411630452428395e-06, + "loss": 0.0305, + "step": 1630 + }, + { + "epoch": 3.97, + "learning_rate": 5.386858392954799e-06, + "loss": 0.026, + "step": 1631 + }, + { + "epoch": 3.97, + "learning_rate": 5.362136313524607e-06, + "loss": 0.0271, + "step": 1632 + }, + { + "epoch": 3.97, + "learning_rate": 5.337464277136925e-06, + "loss": 0.0284, + "step": 1633 + }, + { + "epoch": 3.98, + "learning_rate": 5.3128423466633634e-06, + "loss": 0.0275, + "step": 1634 + }, + { + "epoch": 3.98, + "learning_rate": 5.288270584847813e-06, + "loss": 0.0271, + "step": 1635 + }, + { + "epoch": 3.98, + "learning_rate": 5.263749054306347e-06, + "loss": 0.028, + "step": 1636 + }, + { + "epoch": 3.98, + "learning_rate": 5.23927781752703e-06, + "loss": 0.0257, + "step": 1637 + }, + { + "epoch": 3.99, + "learning_rate": 5.214856936869752e-06, + "loss": 0.0269, + "step": 1638 + }, + { + "epoch": 3.99, + "learning_rate": 5.1904864745660835e-06, + "loss": 0.0233, + "step": 1639 + }, + { + "epoch": 3.99, + "learning_rate": 5.166166492719124e-06, + "loss": 0.0302, + "step": 1640 + }, + { + "epoch": 3.99, + "learning_rate": 5.141897053303327e-06, + "loss": 0.0278, + "step": 1641 + }, + { + "epoch": 4.0, + "learning_rate": 5.117678218164338e-06, + "loss": 0.0327, + "step": 1642 + }, + { + "epoch": 4.0, + "learning_rate": 5.0935100490188795e-06, + "loss": 0.0293, + "step": 1643 + }, + { + "epoch": 4.0, + "eval_loss": 0.9072719812393188, + "eval_runtime": 116.1375, + "eval_samples_per_second": 6.561, + "eval_steps_per_second": 0.413, + "step": 1643 + }, + { + "epoch": 4.0, + "learning_rate": 5.0693926074545315e-06, + "loss": 0.0203, + "step": 1644 + }, + { + "epoch": 4.0, + "learning_rate": 5.045325954929614e-06, + "loss": 0.0074, + "step": 1645 + }, + { + "epoch": 4.01, + "learning_rate": 5.0213101527730345e-06, + "loss": 0.0079, + "step": 1646 + }, + { + "epoch": 4.01, + "learning_rate": 4.9973452621841e-06, + "loss": 0.0079, + "step": 1647 + }, + { + "epoch": 4.01, + "learning_rate": 4.973431344232377e-06, + "loss": 0.0085, + "step": 1648 + }, + { + "epoch": 4.01, + "learning_rate": 4.9495684598575735e-06, + "loss": 0.0075, + "step": 1649 + }, + { + "epoch": 4.02, + "learning_rate": 4.925756669869314e-06, + "loss": 0.0068, + "step": 1650 + }, + { + "epoch": 4.02, + "learning_rate": 4.9019960349470265e-06, + "loss": 0.0074, + "step": 1651 + }, + { + "epoch": 4.02, + "learning_rate": 4.878286615639791e-06, + "loss": 0.0053, + "step": 1652 + }, + { + "epoch": 4.02, + "learning_rate": 4.8546284723661715e-06, + "loss": 0.0074, + "step": 1653 + }, + { + "epoch": 4.03, + "learning_rate": 4.8310216654140425e-06, + "loss": 0.005, + "step": 1654 + }, + { + "epoch": 4.03, + "learning_rate": 4.80746625494051e-06, + "loss": 0.006, + "step": 1655 + }, + { + "epoch": 4.03, + "learning_rate": 4.7839623009716615e-06, + "loss": 0.0054, + "step": 1656 + }, + { + "epoch": 4.03, + "learning_rate": 4.760509863402468e-06, + "loss": 0.0068, + "step": 1657 + }, + { + "epoch": 4.04, + "learning_rate": 4.737109001996637e-06, + "loss": 0.0047, + "step": 1658 + }, + { + "epoch": 4.04, + "learning_rate": 4.7137597763864286e-06, + "loss": 0.0056, + "step": 1659 + }, + { + "epoch": 4.04, + "learning_rate": 4.690462246072516e-06, + "loss": 0.0059, + "step": 1660 + }, + { + "epoch": 4.04, + "learning_rate": 4.667216470423858e-06, + "loss": 0.0051, + "step": 1661 + }, + { + "epoch": 4.05, + "learning_rate": 4.644022508677518e-06, + "loss": 0.0063, + "step": 1662 + }, + { + "epoch": 4.05, + "learning_rate": 4.620880419938511e-06, + "loss": 0.0059, + "step": 1663 + }, + { + "epoch": 4.05, + "learning_rate": 4.5977902631796855e-06, + "loss": 0.0067, + "step": 1664 + }, + { + "epoch": 4.05, + "learning_rate": 4.574752097241533e-06, + "loss": 0.005, + "step": 1665 + }, + { + "epoch": 4.05, + "learning_rate": 4.551765980832059e-06, + "loss": 0.0048, + "step": 1666 + }, + { + "epoch": 4.06, + "learning_rate": 4.528831972526645e-06, + "loss": 0.0066, + "step": 1667 + }, + { + "epoch": 4.06, + "learning_rate": 4.505950130767883e-06, + "loss": 0.0045, + "step": 1668 + }, + { + "epoch": 4.06, + "learning_rate": 4.483120513865411e-06, + "loss": 0.0046, + "step": 1669 + }, + { + "epoch": 4.06, + "learning_rate": 4.460343179995807e-06, + "loss": 0.006, + "step": 1670 + }, + { + "epoch": 4.07, + "learning_rate": 4.4376181872024e-06, + "loss": 0.0047, + "step": 1671 + }, + { + "epoch": 4.07, + "learning_rate": 4.4149455933951396e-06, + "loss": 0.0059, + "step": 1672 + }, + { + "epoch": 4.07, + "learning_rate": 4.392325456350454e-06, + "loss": 0.0052, + "step": 1673 + }, + { + "epoch": 4.07, + "learning_rate": 4.369757833711105e-06, + "loss": 0.0062, + "step": 1674 + }, + { + "epoch": 4.08, + "learning_rate": 4.347242782986008e-06, + "loss": 0.004, + "step": 1675 + }, + { + "epoch": 4.08, + "learning_rate": 4.324780361550129e-06, + "loss": 0.006, + "step": 1676 + }, + { + "epoch": 4.08, + "learning_rate": 4.302370626644314e-06, + "loss": 0.0052, + "step": 1677 + }, + { + "epoch": 4.08, + "learning_rate": 4.280013635375138e-06, + "loss": 0.0048, + "step": 1678 + }, + { + "epoch": 4.09, + "learning_rate": 4.2577094447147856e-06, + "loss": 0.0047, + "step": 1679 + }, + { + "epoch": 4.09, + "learning_rate": 4.235458111500889e-06, + "loss": 0.0046, + "step": 1680 + }, + { + "epoch": 4.09, + "learning_rate": 4.213259692436367e-06, + "loss": 0.0047, + "step": 1681 + }, + { + "epoch": 4.09, + "learning_rate": 4.19111424408932e-06, + "loss": 0.0048, + "step": 1682 + }, + { + "epoch": 4.1, + "learning_rate": 4.169021822892849e-06, + "loss": 0.0045, + "step": 1683 + }, + { + "epoch": 4.1, + "learning_rate": 4.146982485144921e-06, + "loss": 0.0083, + "step": 1684 + }, + { + "epoch": 4.1, + "learning_rate": 4.124996287008245e-06, + "loss": 0.0056, + "step": 1685 + }, + { + "epoch": 4.1, + "learning_rate": 4.103063284510117e-06, + "loss": 0.0061, + "step": 1686 + }, + { + "epoch": 4.11, + "learning_rate": 4.081183533542262e-06, + "loss": 0.0058, + "step": 1687 + }, + { + "epoch": 4.11, + "learning_rate": 4.059357089860702e-06, + "loss": 0.0057, + "step": 1688 + }, + { + "epoch": 4.11, + "learning_rate": 4.037584009085635e-06, + "loss": 0.005, + "step": 1689 + }, + { + "epoch": 4.11, + "learning_rate": 4.015864346701251e-06, + "loss": 0.0049, + "step": 1690 + }, + { + "epoch": 4.12, + "learning_rate": 3.994198158055637e-06, + "loss": 0.0048, + "step": 1691 + }, + { + "epoch": 4.12, + "learning_rate": 3.972585498360606e-06, + "loss": 0.0067, + "step": 1692 + }, + { + "epoch": 4.12, + "learning_rate": 3.951026422691556e-06, + "loss": 0.0054, + "step": 1693 + }, + { + "epoch": 4.12, + "learning_rate": 3.929520985987334e-06, + "loss": 0.0043, + "step": 1694 + }, + { + "epoch": 4.13, + "learning_rate": 3.908069243050122e-06, + "loss": 0.0051, + "step": 1695 + }, + { + "epoch": 4.13, + "learning_rate": 3.886671248545243e-06, + "loss": 0.0045, + "step": 1696 + }, + { + "epoch": 4.13, + "learning_rate": 3.865327057001078e-06, + "loss": 0.0054, + "step": 1697 + }, + { + "epoch": 4.13, + "learning_rate": 3.8440367228088995e-06, + "loss": 0.0051, + "step": 1698 + }, + { + "epoch": 4.14, + "learning_rate": 3.8228003002227255e-06, + "loss": 0.0048, + "step": 1699 + }, + { + "epoch": 4.14, + "learning_rate": 3.801617843359187e-06, + "loss": 0.0048, + "step": 1700 + }, + { + "epoch": 4.14, + "learning_rate": 3.7804894061974183e-06, + "loss": 0.0059, + "step": 1701 + }, + { + "epoch": 4.14, + "learning_rate": 3.7594150425788675e-06, + "loss": 0.0057, + "step": 1702 + }, + { + "epoch": 4.14, + "learning_rate": 3.738394806207207e-06, + "loss": 0.0057, + "step": 1703 + }, + { + "epoch": 4.15, + "learning_rate": 3.7174287506481776e-06, + "loss": 0.0046, + "step": 1704 + }, + { + "epoch": 4.15, + "learning_rate": 3.6965169293294357e-06, + "loss": 0.0039, + "step": 1705 + }, + { + "epoch": 4.15, + "learning_rate": 3.67565939554044e-06, + "loss": 0.0045, + "step": 1706 + }, + { + "epoch": 4.15, + "learning_rate": 3.654856202432319e-06, + "loss": 0.0069, + "step": 1707 + }, + { + "epoch": 4.16, + "learning_rate": 3.6341074030177114e-06, + "loss": 0.0053, + "step": 1708 + }, + { + "epoch": 4.16, + "learning_rate": 3.6134130501706417e-06, + "loss": 0.0061, + "step": 1709 + }, + { + "epoch": 4.16, + "learning_rate": 3.592773196626417e-06, + "loss": 0.0049, + "step": 1710 + }, + { + "epoch": 4.16, + "learning_rate": 3.5721878949814323e-06, + "loss": 0.0051, + "step": 1711 + }, + { + "epoch": 4.17, + "learning_rate": 3.5516571976930786e-06, + "loss": 0.0053, + "step": 1712 + }, + { + "epoch": 4.17, + "learning_rate": 3.531181157079605e-06, + "loss": 0.0045, + "step": 1713 + }, + { + "epoch": 4.17, + "learning_rate": 3.5107598253199758e-06, + "loss": 0.0048, + "step": 1714 + }, + { + "epoch": 4.17, + "learning_rate": 3.4903932544537276e-06, + "loss": 0.0044, + "step": 1715 + }, + { + "epoch": 4.18, + "learning_rate": 3.470081496380881e-06, + "loss": 0.0047, + "step": 1716 + }, + { + "epoch": 4.18, + "learning_rate": 3.4498246028617536e-06, + "loss": 0.0041, + "step": 1717 + }, + { + "epoch": 4.18, + "learning_rate": 3.4296226255168485e-06, + "loss": 0.0053, + "step": 1718 + }, + { + "epoch": 4.18, + "learning_rate": 3.409475615826746e-06, + "loss": 0.0057, + "step": 1719 + }, + { + "epoch": 4.19, + "learning_rate": 3.3893836251319422e-06, + "loss": 0.0044, + "step": 1720 + }, + { + "epoch": 4.19, + "learning_rate": 3.3693467046327117e-06, + "loss": 0.005, + "step": 1721 + }, + { + "epoch": 4.19, + "learning_rate": 3.3493649053890326e-06, + "loss": 0.0045, + "step": 1722 + }, + { + "epoch": 4.19, + "learning_rate": 3.32943827832039e-06, + "loss": 0.0049, + "step": 1723 + }, + { + "epoch": 4.2, + "learning_rate": 3.309566874205672e-06, + "loss": 0.0052, + "step": 1724 + }, + { + "epoch": 4.2, + "learning_rate": 3.289750743683062e-06, + "loss": 0.0046, + "step": 1725 + }, + { + "epoch": 4.2, + "learning_rate": 3.2699899372498733e-06, + "loss": 0.0048, + "step": 1726 + }, + { + "epoch": 4.2, + "learning_rate": 3.2502845052624354e-06, + "loss": 0.0055, + "step": 1727 + }, + { + "epoch": 4.21, + "learning_rate": 3.230634497935983e-06, + "loss": 0.0042, + "step": 1728 + }, + { + "epoch": 4.21, + "learning_rate": 3.211039965344512e-06, + "loss": 0.0037, + "step": 1729 + }, + { + "epoch": 4.21, + "learning_rate": 3.1915009574206262e-06, + "loss": 0.0051, + "step": 1730 + }, + { + "epoch": 4.21, + "learning_rate": 3.17201752395547e-06, + "loss": 0.0051, + "step": 1731 + }, + { + "epoch": 4.22, + "learning_rate": 3.1525897145985472e-06, + "loss": 0.0041, + "step": 1732 + }, + { + "epoch": 4.22, + "learning_rate": 3.133217578857611e-06, + "loss": 0.0048, + "step": 1733 + }, + { + "epoch": 4.22, + "learning_rate": 3.113901166098562e-06, + "loss": 0.0053, + "step": 1734 + }, + { + "epoch": 4.22, + "learning_rate": 3.0946405255452947e-06, + "loss": 0.0044, + "step": 1735 + }, + { + "epoch": 4.23, + "learning_rate": 3.075435706279567e-06, + "loss": 0.0044, + "step": 1736 + }, + { + "epoch": 4.23, + "learning_rate": 3.0562867572409034e-06, + "loss": 0.0051, + "step": 1737 + }, + { + "epoch": 4.23, + "learning_rate": 3.037193727226445e-06, + "loss": 0.0046, + "step": 1738 + }, + { + "epoch": 4.23, + "learning_rate": 3.018156664890834e-06, + "loss": 0.0039, + "step": 1739 + }, + { + "epoch": 4.23, + "learning_rate": 2.9991756187461e-06, + "loss": 0.0054, + "step": 1740 + }, + { + "epoch": 4.24, + "learning_rate": 2.9802506371615246e-06, + "loss": 0.0043, + "step": 1741 + }, + { + "epoch": 4.24, + "learning_rate": 2.961381768363511e-06, + "loss": 0.0049, + "step": 1742 + }, + { + "epoch": 4.24, + "learning_rate": 2.942569060435482e-06, + "loss": 0.0047, + "step": 1743 + }, + { + "epoch": 4.24, + "learning_rate": 2.9238125613177403e-06, + "loss": 0.0047, + "step": 1744 + }, + { + "epoch": 4.25, + "learning_rate": 2.905112318807346e-06, + "loss": 0.0047, + "step": 1745 + }, + { + "epoch": 4.25, + "learning_rate": 2.8864683805580133e-06, + "loss": 0.0054, + "step": 1746 + }, + { + "epoch": 4.25, + "learning_rate": 2.8678807940799744e-06, + "loss": 0.0037, + "step": 1747 + }, + { + "epoch": 4.25, + "learning_rate": 2.8493496067398483e-06, + "loss": 0.0062, + "step": 1748 + }, + { + "epoch": 4.26, + "learning_rate": 2.8308748657605522e-06, + "loss": 0.0042, + "step": 1749 + }, + { + "epoch": 4.26, + "learning_rate": 2.812456618221143e-06, + "loss": 0.006, + "step": 1750 + }, + { + "epoch": 4.26, + "learning_rate": 2.794094911056719e-06, + "loss": 0.0043, + "step": 1751 + }, + { + "epoch": 4.26, + "learning_rate": 2.775789791058306e-06, + "loss": 0.0049, + "step": 1752 + }, + { + "epoch": 4.27, + "learning_rate": 2.757541304872732e-06, + "loss": 0.0052, + "step": 1753 + }, + { + "epoch": 4.27, + "learning_rate": 2.7393494990024834e-06, + "loss": 0.0052, + "step": 1754 + }, + { + "epoch": 4.27, + "learning_rate": 2.7212144198056374e-06, + "loss": 0.0065, + "step": 1755 + }, + { + "epoch": 4.27, + "learning_rate": 2.7031361134956913e-06, + "loss": 0.0049, + "step": 1756 + }, + { + "epoch": 4.28, + "learning_rate": 2.6851146261414747e-06, + "loss": 0.0063, + "step": 1757 + }, + { + "epoch": 4.28, + "learning_rate": 2.667150003667032e-06, + "loss": 0.0043, + "step": 1758 + }, + { + "epoch": 4.28, + "learning_rate": 2.649242291851503e-06, + "loss": 0.0048, + "step": 1759 + }, + { + "epoch": 4.28, + "learning_rate": 2.631391536328992e-06, + "loss": 0.005, + "step": 1760 + }, + { + "epoch": 4.29, + "learning_rate": 2.6135977825884533e-06, + "loss": 0.0066, + "step": 1761 + }, + { + "epoch": 4.29, + "learning_rate": 2.595861075973613e-06, + "loss": 0.0053, + "step": 1762 + }, + { + "epoch": 4.29, + "learning_rate": 2.578181461682794e-06, + "loss": 0.0031, + "step": 1763 + }, + { + "epoch": 4.29, + "learning_rate": 2.5605589847688518e-06, + "loss": 0.0028, + "step": 1764 + }, + { + "epoch": 4.3, + "learning_rate": 2.5429936901390284e-06, + "loss": 0.004, + "step": 1765 + }, + { + "epoch": 4.3, + "learning_rate": 2.5254856225548544e-06, + "loss": 0.0034, + "step": 1766 + }, + { + "epoch": 4.3, + "learning_rate": 2.508034826632022e-06, + "loss": 0.0068, + "step": 1767 + }, + { + "epoch": 4.3, + "learning_rate": 2.4906413468402916e-06, + "loss": 0.0049, + "step": 1768 + }, + { + "epoch": 4.31, + "learning_rate": 2.4733052275033448e-06, + "loss": 0.0042, + "step": 1769 + }, + { + "epoch": 4.31, + "learning_rate": 2.4560265127987147e-06, + "loss": 0.0048, + "step": 1770 + }, + { + "epoch": 4.31, + "learning_rate": 2.4388052467576308e-06, + "loss": 0.0053, + "step": 1771 + }, + { + "epoch": 4.31, + "learning_rate": 2.4216414732649432e-06, + "loss": 0.0047, + "step": 1772 + }, + { + "epoch": 4.32, + "learning_rate": 2.40453523605898e-06, + "loss": 0.0046, + "step": 1773 + }, + { + "epoch": 4.32, + "learning_rate": 2.3874865787314598e-06, + "loss": 0.0043, + "step": 1774 + }, + { + "epoch": 4.32, + "learning_rate": 2.3704955447273636e-06, + "loss": 0.0053, + "step": 1775 + }, + { + "epoch": 4.32, + "learning_rate": 2.3535621773448395e-06, + "loss": 0.005, + "step": 1776 + }, + { + "epoch": 4.32, + "learning_rate": 2.3366865197350733e-06, + "loss": 0.0045, + "step": 1777 + }, + { + "epoch": 4.33, + "learning_rate": 2.3198686149022013e-06, + "loss": 0.006, + "step": 1778 + }, + { + "epoch": 4.33, + "learning_rate": 2.303108505703178e-06, + "loss": 0.0047, + "step": 1779 + }, + { + "epoch": 4.33, + "learning_rate": 2.2864062348476905e-06, + "loss": 0.0038, + "step": 1780 + }, + { + "epoch": 4.33, + "learning_rate": 2.2697618448980217e-06, + "loss": 0.0054, + "step": 1781 + }, + { + "epoch": 4.34, + "learning_rate": 2.2531753782689598e-06, + "loss": 0.0052, + "step": 1782 + }, + { + "epoch": 4.34, + "learning_rate": 2.2366468772276994e-06, + "loss": 0.0038, + "step": 1783 + }, + { + "epoch": 4.34, + "learning_rate": 2.2201763838937184e-06, + "loss": 0.0043, + "step": 1784 + }, + { + "epoch": 4.34, + "learning_rate": 2.2037639402386566e-06, + "loss": 0.0038, + "step": 1785 + }, + { + "epoch": 4.35, + "learning_rate": 2.1874095880862505e-06, + "loss": 0.0052, + "step": 1786 + }, + { + "epoch": 4.35, + "learning_rate": 2.1711133691121903e-06, + "loss": 0.0052, + "step": 1787 + }, + { + "epoch": 4.35, + "learning_rate": 2.1548753248440164e-06, + "loss": 0.0055, + "step": 1788 + }, + { + "epoch": 4.35, + "learning_rate": 2.138695496661039e-06, + "loss": 0.0044, + "step": 1789 + }, + { + "epoch": 4.36, + "learning_rate": 2.122573925794219e-06, + "loss": 0.0037, + "step": 1790 + }, + { + "epoch": 4.36, + "learning_rate": 2.1065106533260383e-06, + "loss": 0.0063, + "step": 1791 + }, + { + "epoch": 4.36, + "learning_rate": 2.0905057201904445e-06, + "loss": 0.0035, + "step": 1792 + }, + { + "epoch": 4.36, + "learning_rate": 2.0745591671727018e-06, + "loss": 0.0048, + "step": 1793 + }, + { + "epoch": 4.37, + "learning_rate": 2.0586710349093013e-06, + "loss": 0.0057, + "step": 1794 + }, + { + "epoch": 4.37, + "learning_rate": 2.0428413638878764e-06, + "loss": 0.0055, + "step": 1795 + }, + { + "epoch": 4.37, + "learning_rate": 2.027070194447081e-06, + "loss": 0.0052, + "step": 1796 + }, + { + "epoch": 4.37, + "learning_rate": 2.0113575667764755e-06, + "loss": 0.0036, + "step": 1797 + }, + { + "epoch": 4.38, + "learning_rate": 1.995703520916456e-06, + "loss": 0.0041, + "step": 1798 + }, + { + "epoch": 4.38, + "learning_rate": 1.9801080967581263e-06, + "loss": 0.0054, + "step": 1799 + }, + { + "epoch": 4.38, + "learning_rate": 1.9645713340431997e-06, + "loss": 0.0044, + "step": 1800 + }, + { + "epoch": 4.38, + "learning_rate": 1.9490932723639165e-06, + "loss": 0.0047, + "step": 1801 + }, + { + "epoch": 4.39, + "learning_rate": 1.9336739511629233e-06, + "loss": 0.0049, + "step": 1802 + }, + { + "epoch": 4.39, + "learning_rate": 1.918313409733169e-06, + "loss": 0.0041, + "step": 1803 + }, + { + "epoch": 4.39, + "learning_rate": 1.9030116872178316e-06, + "loss": 0.0044, + "step": 1804 + }, + { + "epoch": 4.39, + "learning_rate": 1.8877688226101919e-06, + "loss": 0.0059, + "step": 1805 + }, + { + "epoch": 4.4, + "learning_rate": 1.8725848547535368e-06, + "loss": 0.0041, + "step": 1806 + }, + { + "epoch": 4.4, + "learning_rate": 1.8574598223410872e-06, + "loss": 0.0061, + "step": 1807 + }, + { + "epoch": 4.4, + "learning_rate": 1.8423937639158534e-06, + "loss": 0.0046, + "step": 1808 + }, + { + "epoch": 4.4, + "learning_rate": 1.82738671787058e-06, + "loss": 0.0043, + "step": 1809 + }, + { + "epoch": 4.41, + "learning_rate": 1.8124387224476347e-06, + "loss": 0.0052, + "step": 1810 + }, + { + "epoch": 4.41, + "learning_rate": 1.7975498157388915e-06, + "loss": 0.0051, + "step": 1811 + }, + { + "epoch": 4.41, + "learning_rate": 1.7827200356856533e-06, + "loss": 0.0046, + "step": 1812 + }, + { + "epoch": 4.41, + "learning_rate": 1.7679494200785601e-06, + "loss": 0.0069, + "step": 1813 + }, + { + "epoch": 4.41, + "learning_rate": 1.7532380065574726e-06, + "loss": 0.005, + "step": 1814 + }, + { + "epoch": 4.42, + "learning_rate": 1.7385858326113918e-06, + "loss": 0.004, + "step": 1815 + }, + { + "epoch": 4.42, + "learning_rate": 1.7239929355783668e-06, + "loss": 0.004, + "step": 1816 + }, + { + "epoch": 4.42, + "learning_rate": 1.709459352645379e-06, + "loss": 0.0052, + "step": 1817 + }, + { + "epoch": 4.42, + "learning_rate": 1.694985120848258e-06, + "loss": 0.0054, + "step": 1818 + }, + { + "epoch": 4.43, + "learning_rate": 1.6805702770716053e-06, + "loss": 0.0044, + "step": 1819 + }, + { + "epoch": 4.43, + "learning_rate": 1.6662148580486702e-06, + "loss": 0.0044, + "step": 1820 + }, + { + "epoch": 4.43, + "learning_rate": 1.6519189003612767e-06, + "loss": 0.0045, + "step": 1821 + }, + { + "epoch": 4.43, + "learning_rate": 1.6376824404397251e-06, + "loss": 0.0045, + "step": 1822 + }, + { + "epoch": 4.44, + "learning_rate": 1.6235055145626953e-06, + "loss": 0.0045, + "step": 1823 + }, + { + "epoch": 4.44, + "learning_rate": 1.6093881588571501e-06, + "loss": 0.0058, + "step": 1824 + }, + { + "epoch": 4.44, + "learning_rate": 1.5953304092982624e-06, + "loss": 0.0047, + "step": 1825 + }, + { + "epoch": 4.44, + "learning_rate": 1.581332301709304e-06, + "loss": 0.005, + "step": 1826 + }, + { + "epoch": 4.45, + "learning_rate": 1.5673938717615605e-06, + "loss": 0.0065, + "step": 1827 + }, + { + "epoch": 4.45, + "learning_rate": 1.5535151549742528e-06, + "loss": 0.0043, + "step": 1828 + }, + { + "epoch": 4.45, + "learning_rate": 1.5396961867144206e-06, + "loss": 0.0044, + "step": 1829 + }, + { + "epoch": 4.45, + "learning_rate": 1.525937002196845e-06, + "loss": 0.004, + "step": 1830 + }, + { + "epoch": 4.46, + "learning_rate": 1.512237636483982e-06, + "loss": 0.0058, + "step": 1831 + }, + { + "epoch": 4.46, + "learning_rate": 1.4985981244858254e-06, + "loss": 0.0032, + "step": 1832 + }, + { + "epoch": 4.46, + "learning_rate": 1.4850185009598645e-06, + "loss": 0.0058, + "step": 1833 + }, + { + "epoch": 4.46, + "learning_rate": 1.471498800510962e-06, + "loss": 0.0051, + "step": 1834 + }, + { + "epoch": 4.47, + "learning_rate": 1.4580390575912872e-06, + "loss": 0.005, + "step": 1835 + }, + { + "epoch": 4.47, + "learning_rate": 1.4446393065002144e-06, + "loss": 0.0044, + "step": 1836 + }, + { + "epoch": 4.47, + "learning_rate": 1.431299581384249e-06, + "loss": 0.005, + "step": 1837 + }, + { + "epoch": 4.47, + "learning_rate": 1.4180199162369207e-06, + "loss": 0.0053, + "step": 1838 + }, + { + "epoch": 4.48, + "learning_rate": 1.4048003448987213e-06, + "loss": 0.0044, + "step": 1839 + }, + { + "epoch": 4.48, + "learning_rate": 1.3916409010569926e-06, + "loss": 0.0061, + "step": 1840 + }, + { + "epoch": 4.48, + "learning_rate": 1.378541618245871e-06, + "loss": 0.0043, + "step": 1841 + }, + { + "epoch": 4.48, + "learning_rate": 1.365502529846166e-06, + "loss": 0.0038, + "step": 1842 + }, + { + "epoch": 4.49, + "learning_rate": 1.3525236690853093e-06, + "loss": 0.0038, + "step": 1843 + }, + { + "epoch": 4.49, + "learning_rate": 1.3396050690372418e-06, + "loss": 0.0042, + "step": 1844 + }, + { + "epoch": 4.49, + "learning_rate": 1.3267467626223606e-06, + "loss": 0.0058, + "step": 1845 + }, + { + "epoch": 4.49, + "learning_rate": 1.3139487826073937e-06, + "loss": 0.0041, + "step": 1846 + }, + { + "epoch": 4.5, + "learning_rate": 1.3012111616053618e-06, + "loss": 0.0047, + "step": 1847 + }, + { + "epoch": 4.5, + "learning_rate": 1.288533932075453e-06, + "loss": 0.0047, + "step": 1848 + }, + { + "epoch": 4.5, + "learning_rate": 1.2759171263229813e-06, + "loss": 0.0043, + "step": 1849 + }, + { + "epoch": 4.5, + "learning_rate": 1.2633607764992671e-06, + "loss": 0.0044, + "step": 1850 + }, + { + "epoch": 4.51, + "learning_rate": 1.250864914601571e-06, + "loss": 0.0059, + "step": 1851 + }, + { + "epoch": 4.51, + "learning_rate": 1.2384295724730266e-06, + "loss": 0.0062, + "step": 1852 + }, + { + "epoch": 4.51, + "learning_rate": 1.2260547818025326e-06, + "loss": 0.0041, + "step": 1853 + }, + { + "epoch": 4.51, + "learning_rate": 1.2137405741246916e-06, + "loss": 0.0065, + "step": 1854 + }, + { + "epoch": 4.51, + "learning_rate": 1.201486980819716e-06, + "loss": 0.0047, + "step": 1855 + }, + { + "epoch": 4.52, + "learning_rate": 1.1892940331133612e-06, + "loss": 0.0041, + "step": 1856 + }, + { + "epoch": 4.52, + "learning_rate": 1.1771617620768394e-06, + "loss": 0.0051, + "step": 1857 + }, + { + "epoch": 4.52, + "learning_rate": 1.1650901986267365e-06, + "loss": 0.0042, + "step": 1858 + }, + { + "epoch": 4.52, + "learning_rate": 1.1530793735249458e-06, + "loss": 0.0048, + "step": 1859 + }, + { + "epoch": 4.53, + "learning_rate": 1.1411293173785726e-06, + "loss": 0.0042, + "step": 1860 + }, + { + "epoch": 4.53, + "learning_rate": 1.1292400606398635e-06, + "loss": 0.0034, + "step": 1861 + }, + { + "epoch": 4.53, + "learning_rate": 1.1174116336061468e-06, + "loss": 0.005, + "step": 1862 + }, + { + "epoch": 4.53, + "learning_rate": 1.1056440664197144e-06, + "loss": 0.0053, + "step": 1863 + }, + { + "epoch": 4.54, + "learning_rate": 1.0939373890677923e-06, + "loss": 0.0043, + "step": 1864 + }, + { + "epoch": 4.54, + "learning_rate": 1.0822916313824316e-06, + "loss": 0.0046, + "step": 1865 + }, + { + "epoch": 4.54, + "learning_rate": 1.0707068230404404e-06, + "loss": 0.0041, + "step": 1866 + }, + { + "epoch": 4.54, + "learning_rate": 1.059182993563304e-06, + "loss": 0.0043, + "step": 1867 + }, + { + "epoch": 4.55, + "learning_rate": 1.0477201723171377e-06, + "loss": 0.0052, + "step": 1868 + }, + { + "epoch": 4.55, + "learning_rate": 1.036318388512561e-06, + "loss": 0.004, + "step": 1869 + }, + { + "epoch": 4.55, + "learning_rate": 1.0249776712046744e-06, + "loss": 0.0045, + "step": 1870 + }, + { + "epoch": 4.55, + "learning_rate": 1.0136980492929605e-06, + "loss": 0.0043, + "step": 1871 + }, + { + "epoch": 4.56, + "learning_rate": 1.0024795515211988e-06, + "loss": 0.0048, + "step": 1872 + }, + { + "epoch": 4.56, + "learning_rate": 9.913222064774157e-07, + "loss": 0.0039, + "step": 1873 + }, + { + "epoch": 4.56, + "learning_rate": 9.802260425938099e-07, + "loss": 0.0051, + "step": 1874 + }, + { + "epoch": 4.56, + "learning_rate": 9.691910881466564e-07, + "loss": 0.0039, + "step": 1875 + }, + { + "epoch": 4.57, + "learning_rate": 9.58217371256262e-07, + "loss": 0.0055, + "step": 1876 + }, + { + "epoch": 4.57, + "learning_rate": 9.473049198868822e-07, + "loss": 0.0047, + "step": 1877 + }, + { + "epoch": 4.57, + "learning_rate": 9.364537618466451e-07, + "loss": 0.0047, + "step": 1878 + }, + { + "epoch": 4.57, + "learning_rate": 9.25663924787487e-07, + "loss": 0.0053, + "step": 1879 + }, + { + "epoch": 4.58, + "learning_rate": 9.149354362050805e-07, + "loss": 0.0037, + "step": 1880 + }, + { + "epoch": 4.58, + "learning_rate": 9.042683234387645e-07, + "loss": 0.0044, + "step": 1881 + }, + { + "epoch": 4.58, + "learning_rate": 8.936626136714754e-07, + "loss": 0.0058, + "step": 1882 + }, + { + "epoch": 4.58, + "learning_rate": 8.831183339296751e-07, + "loss": 0.0045, + "step": 1883 + }, + { + "epoch": 4.59, + "learning_rate": 8.726355110832862e-07, + "loss": 0.0049, + "step": 1884 + }, + { + "epoch": 4.59, + "learning_rate": 8.622141718456128e-07, + "loss": 0.0042, + "step": 1885 + }, + { + "epoch": 4.59, + "learning_rate": 8.51854342773295e-07, + "loss": 0.005, + "step": 1886 + }, + { + "epoch": 4.59, + "learning_rate": 8.415560502662151e-07, + "loss": 0.008, + "step": 1887 + }, + { + "epoch": 4.6, + "learning_rate": 8.313193205674391e-07, + "loss": 0.0055, + "step": 1888 + }, + { + "epoch": 4.6, + "learning_rate": 8.211441797631752e-07, + "loss": 0.004, + "step": 1889 + }, + { + "epoch": 4.6, + "learning_rate": 8.110306537826601e-07, + "loss": 0.0051, + "step": 1890 + }, + { + "epoch": 4.6, + "learning_rate": 8.009787683981279e-07, + "loss": 0.0055, + "step": 1891 + }, + { + "epoch": 4.6, + "learning_rate": 7.909885492247359e-07, + "loss": 0.0037, + "step": 1892 + }, + { + "epoch": 4.61, + "learning_rate": 7.81060021720495e-07, + "loss": 0.0039, + "step": 1893 + }, + { + "epoch": 4.61, + "learning_rate": 7.711932111862025e-07, + "loss": 0.0047, + "step": 1894 + }, + { + "epoch": 4.61, + "learning_rate": 7.613881427654013e-07, + "loss": 0.0039, + "step": 1895 + }, + { + "epoch": 4.61, + "learning_rate": 7.516448414442739e-07, + "loss": 0.0035, + "step": 1896 + }, + { + "epoch": 4.62, + "learning_rate": 7.419633320516178e-07, + "loss": 0.0054, + "step": 1897 + }, + { + "epoch": 4.62, + "learning_rate": 7.32343639258759e-07, + "loss": 0.0055, + "step": 1898 + }, + { + "epoch": 4.62, + "learning_rate": 7.227857875795025e-07, + "loss": 0.0049, + "step": 1899 + }, + { + "epoch": 4.62, + "learning_rate": 7.13289801370054e-07, + "loss": 0.0048, + "step": 1900 + }, + { + "epoch": 4.63, + "learning_rate": 7.038557048289818e-07, + "loss": 0.004, + "step": 1901 + }, + { + "epoch": 4.63, + "learning_rate": 6.944835219971329e-07, + "loss": 0.0051, + "step": 1902 + }, + { + "epoch": 4.63, + "learning_rate": 6.851732767575752e-07, + "loss": 0.0037, + "step": 1903 + }, + { + "epoch": 4.63, + "learning_rate": 6.759249928355554e-07, + "loss": 0.0045, + "step": 1904 + }, + { + "epoch": 4.64, + "learning_rate": 6.667386937984105e-07, + "loss": 0.0055, + "step": 1905 + }, + { + "epoch": 4.64, + "learning_rate": 6.576144030555259e-07, + "loss": 0.0039, + "step": 1906 + }, + { + "epoch": 4.64, + "learning_rate": 6.485521438582748e-07, + "loss": 0.0044, + "step": 1907 + }, + { + "epoch": 4.64, + "learning_rate": 6.395519392999621e-07, + "loss": 0.0048, + "step": 1908 + }, + { + "epoch": 4.65, + "learning_rate": 6.30613812315739e-07, + "loss": 0.0053, + "step": 1909 + }, + { + "epoch": 4.65, + "learning_rate": 6.217377856825885e-07, + "loss": 0.0057, + "step": 1910 + }, + { + "epoch": 4.65, + "learning_rate": 6.129238820192285e-07, + "loss": 0.0045, + "step": 1911 + }, + { + "epoch": 4.65, + "learning_rate": 6.041721237860676e-07, + "loss": 0.0047, + "step": 1912 + }, + { + "epoch": 4.66, + "learning_rate": 5.954825332851632e-07, + "loss": 0.0055, + "step": 1913 + }, + { + "epoch": 4.66, + "learning_rate": 5.868551326601413e-07, + "loss": 0.0037, + "step": 1914 + }, + { + "epoch": 4.66, + "learning_rate": 5.782899438961487e-07, + "loss": 0.0054, + "step": 1915 + }, + { + "epoch": 4.66, + "learning_rate": 5.697869888198065e-07, + "loss": 0.0041, + "step": 1916 + }, + { + "epoch": 4.67, + "learning_rate": 5.613462890991378e-07, + "loss": 0.0047, + "step": 1917 + }, + { + "epoch": 4.67, + "learning_rate": 5.529678662435228e-07, + "loss": 0.0028, + "step": 1918 + }, + { + "epoch": 4.67, + "learning_rate": 5.446517416036412e-07, + "loss": 0.0041, + "step": 1919 + }, + { + "epoch": 4.67, + "learning_rate": 5.363979363714245e-07, + "loss": 0.0039, + "step": 1920 + }, + { + "epoch": 4.68, + "learning_rate": 5.282064715799895e-07, + "loss": 0.0046, + "step": 1921 + }, + { + "epoch": 4.68, + "learning_rate": 5.20077368103597e-07, + "loss": 0.0038, + "step": 1922 + }, + { + "epoch": 4.68, + "learning_rate": 5.120106466575875e-07, + "loss": 0.0053, + "step": 1923 + }, + { + "epoch": 4.68, + "learning_rate": 5.040063277983287e-07, + "loss": 0.0039, + "step": 1924 + }, + { + "epoch": 4.69, + "learning_rate": 4.96064431923185e-07, + "loss": 0.0053, + "step": 1925 + }, + { + "epoch": 4.69, + "learning_rate": 4.881849792704368e-07, + "loss": 0.0041, + "step": 1926 + }, + { + "epoch": 4.69, + "learning_rate": 4.803679899192392e-07, + "loss": 0.0042, + "step": 1927 + }, + { + "epoch": 4.69, + "learning_rate": 4.7261348378958016e-07, + "loss": 0.0048, + "step": 1928 + }, + { + "epoch": 4.69, + "learning_rate": 4.649214806422164e-07, + "loss": 0.0068, + "step": 1929 + }, + { + "epoch": 4.7, + "learning_rate": 4.5729200007862683e-07, + "loss": 0.0053, + "step": 1930 + }, + { + "epoch": 4.7, + "learning_rate": 4.497250615409732e-07, + "loss": 0.004, + "step": 1931 + }, + { + "epoch": 4.7, + "learning_rate": 4.4222068431203634e-07, + "loss": 0.0038, + "step": 1932 + }, + { + "epoch": 4.7, + "learning_rate": 4.34778887515172e-07, + "loss": 0.005, + "step": 1933 + }, + { + "epoch": 4.71, + "learning_rate": 4.2739969011426074e-07, + "loss": 0.0055, + "step": 1934 + }, + { + "epoch": 4.71, + "learning_rate": 4.2008311091366606e-07, + "loss": 0.0051, + "step": 1935 + }, + { + "epoch": 4.71, + "learning_rate": 4.128291685581792e-07, + "loss": 0.0055, + "step": 1936 + }, + { + "epoch": 4.71, + "learning_rate": 4.0563788153297755e-07, + "loss": 0.0043, + "step": 1937 + }, + { + "epoch": 4.72, + "learning_rate": 3.9850926816357157e-07, + "loss": 0.004, + "step": 1938 + }, + { + "epoch": 4.72, + "learning_rate": 3.9144334661576074e-07, + "loss": 0.0039, + "step": 1939 + }, + { + "epoch": 4.72, + "learning_rate": 3.8444013489558337e-07, + "loss": 0.0042, + "step": 1940 + }, + { + "epoch": 4.72, + "learning_rate": 3.774996508492834e-07, + "loss": 0.0054, + "step": 1941 + }, + { + "epoch": 4.73, + "learning_rate": 3.70621912163252e-07, + "loss": 0.0062, + "step": 1942 + }, + { + "epoch": 4.73, + "learning_rate": 3.6380693636398343e-07, + "loss": 0.0051, + "step": 1943 + }, + { + "epoch": 4.73, + "learning_rate": 3.570547408180441e-07, + "loss": 0.0043, + "step": 1944 + }, + { + "epoch": 4.73, + "learning_rate": 3.503653427320036e-07, + "loss": 0.0035, + "step": 1945 + }, + { + "epoch": 4.74, + "learning_rate": 3.4373875915241493e-07, + "loss": 0.0057, + "step": 1946 + }, + { + "epoch": 4.74, + "learning_rate": 3.371750069657592e-07, + "loss": 0.0045, + "step": 1947 + }, + { + "epoch": 4.74, + "learning_rate": 3.306741028984012e-07, + "loss": 0.0058, + "step": 1948 + }, + { + "epoch": 4.74, + "learning_rate": 3.242360635165559e-07, + "loss": 0.0044, + "step": 1949 + }, + { + "epoch": 4.75, + "learning_rate": 3.1786090522624156e-07, + "loss": 0.0047, + "step": 1950 + }, + { + "epoch": 4.75, + "learning_rate": 3.1154864427322685e-07, + "loss": 0.0034, + "step": 1951 + }, + { + "epoch": 4.75, + "learning_rate": 3.052992967430085e-07, + "loss": 0.0035, + "step": 1952 + }, + { + "epoch": 4.75, + "learning_rate": 2.991128785607589e-07, + "loss": 0.0039, + "step": 1953 + }, + { + "epoch": 4.76, + "learning_rate": 2.9298940549128964e-07, + "loss": 0.0039, + "step": 1954 + }, + { + "epoch": 4.76, + "learning_rate": 2.8692889313900186e-07, + "loss": 0.0047, + "step": 1955 + }, + { + "epoch": 4.76, + "learning_rate": 2.8093135694786667e-07, + "loss": 0.0056, + "step": 1956 + }, + { + "epoch": 4.76, + "learning_rate": 2.749968122013669e-07, + "loss": 0.0051, + "step": 1957 + }, + { + "epoch": 4.77, + "learning_rate": 2.6912527402246367e-07, + "loss": 0.0043, + "step": 1958 + }, + { + "epoch": 4.77, + "learning_rate": 2.633167573735579e-07, + "loss": 0.0054, + "step": 1959 + }, + { + "epoch": 4.77, + "learning_rate": 2.575712770564592e-07, + "loss": 0.0035, + "step": 1960 + }, + { + "epoch": 4.77, + "learning_rate": 2.5188884771233656e-07, + "loss": 0.0043, + "step": 1961 + }, + { + "epoch": 4.78, + "learning_rate": 2.4626948382168726e-07, + "loss": 0.0038, + "step": 1962 + }, + { + "epoch": 4.78, + "learning_rate": 2.407131997043038e-07, + "loss": 0.0039, + "step": 1963 + }, + { + "epoch": 4.78, + "learning_rate": 2.3522000951922417e-07, + "loss": 0.0031, + "step": 1964 + }, + { + "epoch": 4.78, + "learning_rate": 2.2978992726471748e-07, + "loss": 0.0045, + "step": 1965 + }, + { + "epoch": 4.78, + "learning_rate": 2.244229667782205e-07, + "loss": 0.0081, + "step": 1966 + }, + { + "epoch": 4.79, + "learning_rate": 2.1911914173632643e-07, + "loss": 0.0046, + "step": 1967 + }, + { + "epoch": 4.79, + "learning_rate": 2.1387846565474045e-07, + "loss": 0.0048, + "step": 1968 + }, + { + "epoch": 4.79, + "learning_rate": 2.08700951888241e-07, + "loss": 0.0039, + "step": 1969 + }, + { + "epoch": 4.79, + "learning_rate": 2.0358661363065746e-07, + "loss": 0.0042, + "step": 1970 + }, + { + "epoch": 4.8, + "learning_rate": 1.985354639148229e-07, + "loss": 0.0051, + "step": 1971 + }, + { + "epoch": 4.8, + "learning_rate": 1.9354751561254937e-07, + "loss": 0.004, + "step": 1972 + }, + { + "epoch": 4.8, + "learning_rate": 1.8862278143459144e-07, + "loss": 0.0043, + "step": 1973 + }, + { + "epoch": 4.8, + "learning_rate": 1.8376127393062158e-07, + "loss": 0.0051, + "step": 1974 + }, + { + "epoch": 4.81, + "learning_rate": 1.7896300548918832e-07, + "loss": 0.0058, + "step": 1975 + }, + { + "epoch": 4.81, + "learning_rate": 1.7422798833768572e-07, + "loss": 0.004, + "step": 1976 + }, + { + "epoch": 4.81, + "learning_rate": 1.6955623454233128e-07, + "loss": 0.0051, + "step": 1977 + }, + { + "epoch": 4.81, + "learning_rate": 1.6494775600812417e-07, + "loss": 0.0048, + "step": 1978 + }, + { + "epoch": 4.82, + "learning_rate": 1.6040256447881763e-07, + "loss": 0.0056, + "step": 1979 + }, + { + "epoch": 4.82, + "learning_rate": 1.559206715368966e-07, + "loss": 0.0039, + "step": 1980 + }, + { + "epoch": 4.82, + "learning_rate": 1.5150208860354176e-07, + "loss": 0.0053, + "step": 1981 + }, + { + "epoch": 4.82, + "learning_rate": 1.4714682693859617e-07, + "loss": 0.0039, + "step": 1982 + }, + { + "epoch": 4.83, + "learning_rate": 1.428548976405486e-07, + "loss": 0.0058, + "step": 1983 + }, + { + "epoch": 4.83, + "learning_rate": 1.3862631164649475e-07, + "loss": 0.0039, + "step": 1984 + }, + { + "epoch": 4.83, + "learning_rate": 1.344610797321122e-07, + "loss": 0.0048, + "step": 1985 + }, + { + "epoch": 4.83, + "learning_rate": 1.3035921251163263e-07, + "loss": 0.0039, + "step": 1986 + }, + { + "epoch": 4.84, + "learning_rate": 1.2632072043782252e-07, + "loss": 0.0032, + "step": 1987 + }, + { + "epoch": 4.84, + "learning_rate": 1.223456138019413e-07, + "loss": 0.0042, + "step": 1988 + }, + { + "epoch": 4.84, + "learning_rate": 1.1843390273373057e-07, + "loss": 0.0056, + "step": 1989 + }, + { + "epoch": 4.84, + "learning_rate": 1.1458559720137762e-07, + "loss": 0.0042, + "step": 1990 + }, + { + "epoch": 4.85, + "learning_rate": 1.1080070701149359e-07, + "loss": 0.004, + "step": 1991 + }, + { + "epoch": 4.85, + "learning_rate": 1.0707924180909379e-07, + "loss": 0.0047, + "step": 1992 + }, + { + "epoch": 4.85, + "learning_rate": 1.0342121107755898e-07, + "loss": 0.0057, + "step": 1993 + }, + { + "epoch": 4.85, + "learning_rate": 9.982662413862975e-08, + "loss": 0.0046, + "step": 1994 + }, + { + "epoch": 4.86, + "learning_rate": 9.629549015237049e-08, + "loss": 0.0037, + "step": 1995 + }, + { + "epoch": 4.86, + "learning_rate": 9.282781811714159e-08, + "loss": 0.0055, + "step": 1996 + }, + { + "epoch": 4.86, + "learning_rate": 8.94236168695911e-08, + "loss": 0.004, + "step": 1997 + }, + { + "epoch": 4.86, + "learning_rate": 8.608289508462708e-08, + "loss": 0.0036, + "step": 1998 + }, + { + "epoch": 4.87, + "learning_rate": 8.280566127538691e-08, + "loss": 0.0047, + "step": 1999 + }, + { + "epoch": 4.87, + "learning_rate": 7.959192379322077e-08, + "loss": 0.0043, + "step": 2000 + }, + { + "epoch": 4.87, + "learning_rate": 7.644169082768326e-08, + "loss": 0.0043, + "step": 2001 + }, + { + "epoch": 4.87, + "learning_rate": 7.335497040648898e-08, + "loss": 0.004, + "step": 2002 + }, + { + "epoch": 4.87, + "learning_rate": 7.033177039550698e-08, + "loss": 0.0037, + "step": 2003 + }, + { + "epoch": 4.88, + "learning_rate": 6.73720984987386e-08, + "loss": 0.0051, + "step": 2004 + }, + { + "epoch": 4.88, + "learning_rate": 6.4475962258298e-08, + "loss": 0.0046, + "step": 2005 + }, + { + "epoch": 4.88, + "learning_rate": 6.164336905438994e-08, + "loss": 0.005, + "step": 2006 + }, + { + "epoch": 4.88, + "learning_rate": 5.8874326105293196e-08, + "loss": 0.0045, + "step": 2007 + }, + { + "epoch": 4.89, + "learning_rate": 5.616884046734383e-08, + "loss": 0.0051, + "step": 2008 + }, + { + "epoch": 4.89, + "learning_rate": 5.352691903491303e-08, + "loss": 0.0046, + "step": 2009 + }, + { + "epoch": 4.89, + "learning_rate": 5.094856854039043e-08, + "loss": 0.0049, + "step": 2010 + }, + { + "epoch": 4.89, + "learning_rate": 4.8433795554173046e-08, + "loss": 0.0037, + "step": 2011 + }, + { + "epoch": 4.9, + "learning_rate": 4.598260648463748e-08, + "loss": 0.0039, + "step": 2012 + }, + { + "epoch": 4.9, + "learning_rate": 4.359500757813717e-08, + "loss": 0.0041, + "step": 2013 + }, + { + "epoch": 4.9, + "learning_rate": 4.1271004918971847e-08, + "loss": 0.004, + "step": 2014 + }, + { + "epoch": 4.9, + "learning_rate": 3.901060442938198e-08, + "loss": 0.0056, + "step": 2015 + }, + { + "epoch": 4.91, + "learning_rate": 3.68138118695377e-08, + "loss": 0.0049, + "step": 2016 + }, + { + "epoch": 4.91, + "learning_rate": 3.468063283750267e-08, + "loss": 0.0051, + "step": 2017 + }, + { + "epoch": 4.91, + "learning_rate": 3.2611072769250795e-08, + "loss": 0.0039, + "step": 2018 + }, + { + "epoch": 4.91, + "learning_rate": 3.0605136938624544e-08, + "loss": 0.004, + "step": 2019 + }, + { + "epoch": 4.92, + "learning_rate": 2.866283045734053e-08, + "loss": 0.0045, + "step": 2020 + }, + { + "epoch": 4.92, + "learning_rate": 2.6784158274964498e-08, + "loss": 0.0063, + "step": 2021 + }, + { + "epoch": 4.92, + "learning_rate": 2.496912517890304e-08, + "loss": 0.0056, + "step": 2022 + }, + { + "epoch": 4.92, + "learning_rate": 2.3217735794392458e-08, + "loss": 0.0039, + "step": 2023 + }, + { + "epoch": 4.93, + "learning_rate": 2.152999458449323e-08, + "loss": 0.004, + "step": 2024 + }, + { + "epoch": 4.93, + "learning_rate": 1.990590585005947e-08, + "loss": 0.0045, + "step": 2025 + }, + { + "epoch": 4.93, + "learning_rate": 1.834547372975004e-08, + "loss": 0.0046, + "step": 2026 + }, + { + "epoch": 4.93, + "learning_rate": 1.6848702200000786e-08, + "loss": 0.0051, + "step": 2027 + }, + { + "epoch": 4.94, + "learning_rate": 1.5415595075027324e-08, + "loss": 0.0054, + "step": 2028 + }, + { + "epoch": 4.94, + "learning_rate": 1.4046156006808364e-08, + "loss": 0.0048, + "step": 2029 + }, + { + "epoch": 4.94, + "learning_rate": 1.2740388485071863e-08, + "loss": 0.0041, + "step": 2030 + }, + { + "epoch": 4.94, + "learning_rate": 1.149829583730333e-08, + "loss": 0.0048, + "step": 2031 + }, + { + "epoch": 4.95, + "learning_rate": 1.03198812287153e-08, + "loss": 0.0045, + "step": 2032 + }, + { + "epoch": 4.95, + "learning_rate": 9.20514766225289e-09, + "loss": 0.0046, + "step": 2033 + }, + { + "epoch": 4.95, + "learning_rate": 8.154097978591014e-09, + "loss": 0.0039, + "step": 2034 + }, + { + "epoch": 4.95, + "learning_rate": 7.166734856103863e-09, + "loss": 0.0044, + "step": 2035 + }, + { + "epoch": 4.96, + "learning_rate": 6.243060810892654e-09, + "loss": 0.0043, + "step": 2036 + }, + { + "epoch": 4.96, + "learning_rate": 5.3830781967412205e-09, + "loss": 0.006, + "step": 2037 + }, + { + "epoch": 4.96, + "learning_rate": 4.586789205140995e-09, + "loss": 0.004, + "step": 2038 + }, + { + "epoch": 4.96, + "learning_rate": 3.854195865271582e-09, + "loss": 0.0048, + "step": 2039 + }, + { + "epoch": 4.97, + "learning_rate": 3.1853000439951987e-09, + "loss": 0.0054, + "step": 2040 + }, + { + "epoch": 4.97, + "learning_rate": 2.58010344585391e-09, + "loss": 0.0038, + "step": 2041 + }, + { + "epoch": 4.97, + "learning_rate": 2.038607613066845e-09, + "loss": 0.0041, + "step": 2042 + }, + { + "epoch": 4.97, + "learning_rate": 1.5608139255246512e-09, + "loss": 0.0047, + "step": 2043 + }, + { + "epoch": 4.97, + "learning_rate": 1.1467236007867144e-09, + "loss": 0.0036, + "step": 2044 + }, + { + "epoch": 4.98, + "learning_rate": 7.963376940728351e-10, + "loss": 0.0051, + "step": 2045 + }, + { + "epoch": 4.98, + "learning_rate": 5.096570982743298e-10, + "loss": 0.0049, + "step": 2046 + }, + { + "epoch": 4.98, + "learning_rate": 2.866825439346021e-10, + "loss": 0.004, + "step": 2047 + }, + { + "epoch": 4.98, + "learning_rate": 1.2741459925746935e-10, + "loss": 0.0057, + "step": 2048 + }, + { + "epoch": 4.99, + "learning_rate": 3.185367010716256e-11, + "loss": 0.004, + "step": 2049 + }, + { + "epoch": 4.99, + "learning_rate": 0.0, + "loss": 0.0058, + "step": 2050 + }, + { + "epoch": 4.99, + "eval_loss": 1.1227930784225464, + "eval_runtime": 115.4648, + "eval_samples_per_second": 6.599, + "eval_steps_per_second": 0.416, + "step": 2050 + }, + { + "epoch": 4.99, + "step": 2050, + "total_flos": 2.0983989578550477e+19, + "train_loss": 0.1699243627804354, + "train_runtime": 174534.9388, + "train_samples_per_second": 1.506, + "train_steps_per_second": 0.012 + } + ], + "max_steps": 2050, + "num_train_epochs": 5, + "total_flos": 2.0983989578550477e+19, + "trial_name": null, + "trial_params": null +}