{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.9999474154703686, "eval_steps": 500, "global_step": 4754, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 2.7972027972027973e-07, "loss": 3.7565, "step": 1 }, { "epoch": 0.0, "learning_rate": 5.594405594405595e-07, "loss": 3.6338, "step": 2 }, { "epoch": 0.0, "learning_rate": 8.391608391608393e-07, "loss": 3.6721, "step": 3 }, { "epoch": 0.0, "learning_rate": 1.118881118881119e-06, "loss": 3.6397, "step": 4 }, { "epoch": 0.0, "learning_rate": 1.3986013986013987e-06, "loss": 3.7318, "step": 5 }, { "epoch": 0.0, "learning_rate": 1.6783216783216785e-06, "loss": 3.3937, "step": 6 }, { "epoch": 0.0, "learning_rate": 1.9580419580419583e-06, "loss": 3.173, "step": 7 }, { "epoch": 0.0, "learning_rate": 2.237762237762238e-06, "loss": 3.0256, "step": 8 }, { "epoch": 0.0, "learning_rate": 2.517482517482518e-06, "loss": 2.2789, "step": 9 }, { "epoch": 0.0, "learning_rate": 2.7972027972027974e-06, "loss": 2.1252, "step": 10 }, { "epoch": 0.0, "learning_rate": 3.0769230769230774e-06, "loss": 1.8772, "step": 11 }, { "epoch": 0.0, "learning_rate": 3.356643356643357e-06, "loss": 1.5638, "step": 12 }, { "epoch": 0.0, "learning_rate": 3.6363636363636366e-06, "loss": 1.5352, "step": 13 }, { "epoch": 0.0, "learning_rate": 3.916083916083917e-06, "loss": 1.4646, "step": 14 }, { "epoch": 0.0, "learning_rate": 4.195804195804197e-06, "loss": 1.4345, "step": 15 }, { "epoch": 0.0, "learning_rate": 4.475524475524476e-06, "loss": 1.4364, "step": 16 }, { "epoch": 0.0, "learning_rate": 4.755244755244756e-06, "loss": 1.4582, "step": 17 }, { "epoch": 0.0, "learning_rate": 5.034965034965036e-06, "loss": 1.4391, "step": 18 }, { "epoch": 0.0, "learning_rate": 5.314685314685315e-06, "loss": 1.4497, "step": 19 }, { "epoch": 0.0, "learning_rate": 5.594405594405595e-06, "loss": 1.45, "step": 20 }, { "epoch": 0.0, "learning_rate": 5.874125874125874e-06, "loss": 1.4419, "step": 21 }, { "epoch": 0.0, "learning_rate": 6.153846153846155e-06, "loss": 1.4565, "step": 22 }, { "epoch": 0.0, "learning_rate": 6.433566433566434e-06, "loss": 1.4548, "step": 23 }, { "epoch": 0.01, "learning_rate": 6.713286713286714e-06, "loss": 1.3943, "step": 24 }, { "epoch": 0.01, "learning_rate": 6.993006993006993e-06, "loss": 1.4182, "step": 25 }, { "epoch": 0.01, "learning_rate": 7.272727272727273e-06, "loss": 1.4243, "step": 26 }, { "epoch": 0.01, "learning_rate": 7.552447552447552e-06, "loss": 1.4116, "step": 27 }, { "epoch": 0.01, "learning_rate": 7.832167832167833e-06, "loss": 1.4093, "step": 28 }, { "epoch": 0.01, "learning_rate": 8.111888111888112e-06, "loss": 1.4619, "step": 29 }, { "epoch": 0.01, "learning_rate": 8.391608391608393e-06, "loss": 1.4145, "step": 30 }, { "epoch": 0.01, "learning_rate": 8.671328671328672e-06, "loss": 1.3979, "step": 31 }, { "epoch": 0.01, "learning_rate": 8.951048951048951e-06, "loss": 1.4465, "step": 32 }, { "epoch": 0.01, "learning_rate": 9.230769230769232e-06, "loss": 1.3406, "step": 33 }, { "epoch": 0.01, "learning_rate": 9.510489510489511e-06, "loss": 1.4325, "step": 34 }, { "epoch": 0.01, "learning_rate": 9.79020979020979e-06, "loss": 1.4771, "step": 35 }, { "epoch": 0.01, "learning_rate": 1.0069930069930071e-05, "loss": 1.3757, "step": 36 }, { "epoch": 0.01, "learning_rate": 1.034965034965035e-05, "loss": 1.4177, "step": 37 }, { "epoch": 0.01, "learning_rate": 1.062937062937063e-05, "loss": 1.3953, "step": 38 }, { "epoch": 0.01, "learning_rate": 1.0909090909090909e-05, "loss": 1.3675, "step": 39 }, { "epoch": 0.01, "learning_rate": 1.118881118881119e-05, "loss": 1.3516, "step": 40 }, { "epoch": 0.01, "learning_rate": 1.1468531468531469e-05, "loss": 1.3917, "step": 41 }, { "epoch": 0.01, "learning_rate": 1.1748251748251748e-05, "loss": 1.337, "step": 42 }, { "epoch": 0.01, "learning_rate": 1.2027972027972027e-05, "loss": 1.3242, "step": 43 }, { "epoch": 0.01, "learning_rate": 1.230769230769231e-05, "loss": 1.3318, "step": 44 }, { "epoch": 0.01, "learning_rate": 1.2587412587412589e-05, "loss": 1.4071, "step": 45 }, { "epoch": 0.01, "learning_rate": 1.2867132867132868e-05, "loss": 1.392, "step": 46 }, { "epoch": 0.01, "learning_rate": 1.3146853146853147e-05, "loss": 1.4016, "step": 47 }, { "epoch": 0.01, "learning_rate": 1.3426573426573428e-05, "loss": 1.3953, "step": 48 }, { "epoch": 0.01, "learning_rate": 1.3706293706293707e-05, "loss": 1.3797, "step": 49 }, { "epoch": 0.01, "learning_rate": 1.3986013986013986e-05, "loss": 1.3455, "step": 50 }, { "epoch": 0.01, "learning_rate": 1.4265734265734267e-05, "loss": 1.4462, "step": 51 }, { "epoch": 0.01, "learning_rate": 1.4545454545454546e-05, "loss": 1.3536, "step": 52 }, { "epoch": 0.01, "learning_rate": 1.4825174825174825e-05, "loss": 1.4016, "step": 53 }, { "epoch": 0.01, "learning_rate": 1.5104895104895105e-05, "loss": 1.441, "step": 54 }, { "epoch": 0.01, "learning_rate": 1.5384615384615387e-05, "loss": 1.4135, "step": 55 }, { "epoch": 0.01, "learning_rate": 1.5664335664335666e-05, "loss": 1.4371, "step": 56 }, { "epoch": 0.01, "learning_rate": 1.5944055944055945e-05, "loss": 1.4199, "step": 57 }, { "epoch": 0.01, "learning_rate": 1.6223776223776225e-05, "loss": 1.373, "step": 58 }, { "epoch": 0.01, "learning_rate": 1.6503496503496507e-05, "loss": 1.4099, "step": 59 }, { "epoch": 0.01, "learning_rate": 1.6783216783216786e-05, "loss": 1.3375, "step": 60 }, { "epoch": 0.01, "learning_rate": 1.7062937062937065e-05, "loss": 1.3789, "step": 61 }, { "epoch": 0.01, "learning_rate": 1.7342657342657345e-05, "loss": 1.4144, "step": 62 }, { "epoch": 0.01, "learning_rate": 1.7622377622377624e-05, "loss": 1.3711, "step": 63 }, { "epoch": 0.01, "learning_rate": 1.7902097902097903e-05, "loss": 1.4263, "step": 64 }, { "epoch": 0.01, "learning_rate": 1.8181818181818182e-05, "loss": 1.3606, "step": 65 }, { "epoch": 0.01, "learning_rate": 1.8461538461538465e-05, "loss": 1.3801, "step": 66 }, { "epoch": 0.01, "learning_rate": 1.8741258741258744e-05, "loss": 1.414, "step": 67 }, { "epoch": 0.01, "learning_rate": 1.9020979020979023e-05, "loss": 1.3674, "step": 68 }, { "epoch": 0.01, "learning_rate": 1.9300699300699302e-05, "loss": 1.4363, "step": 69 }, { "epoch": 0.01, "learning_rate": 1.958041958041958e-05, "loss": 1.3421, "step": 70 }, { "epoch": 0.01, "learning_rate": 1.986013986013986e-05, "loss": 1.4186, "step": 71 }, { "epoch": 0.02, "learning_rate": 2.0139860139860143e-05, "loss": 1.7448, "step": 72 }, { "epoch": 0.02, "learning_rate": 2.0419580419580422e-05, "loss": 1.3815, "step": 73 }, { "epoch": 0.02, "learning_rate": 2.06993006993007e-05, "loss": 1.3493, "step": 74 }, { "epoch": 0.02, "learning_rate": 2.097902097902098e-05, "loss": 1.3953, "step": 75 }, { "epoch": 0.02, "learning_rate": 2.125874125874126e-05, "loss": 1.4071, "step": 76 }, { "epoch": 0.02, "learning_rate": 2.153846153846154e-05, "loss": 1.3465, "step": 77 }, { "epoch": 0.02, "learning_rate": 2.1818181818181818e-05, "loss": 1.4073, "step": 78 }, { "epoch": 0.02, "learning_rate": 2.2097902097902097e-05, "loss": 1.4047, "step": 79 }, { "epoch": 0.02, "learning_rate": 2.237762237762238e-05, "loss": 1.3887, "step": 80 }, { "epoch": 0.02, "learning_rate": 2.265734265734266e-05, "loss": 1.3287, "step": 81 }, { "epoch": 0.02, "learning_rate": 2.2937062937062938e-05, "loss": 1.3959, "step": 82 }, { "epoch": 0.02, "learning_rate": 2.3216783216783217e-05, "loss": 1.3785, "step": 83 }, { "epoch": 0.02, "learning_rate": 2.3496503496503496e-05, "loss": 1.4571, "step": 84 }, { "epoch": 0.02, "learning_rate": 2.3776223776223775e-05, "loss": 1.3935, "step": 85 }, { "epoch": 0.02, "learning_rate": 2.4055944055944054e-05, "loss": 1.4453, "step": 86 }, { "epoch": 0.02, "learning_rate": 2.433566433566434e-05, "loss": 1.3951, "step": 87 }, { "epoch": 0.02, "learning_rate": 2.461538461538462e-05, "loss": 1.3483, "step": 88 }, { "epoch": 0.02, "learning_rate": 2.48951048951049e-05, "loss": 1.3968, "step": 89 }, { "epoch": 0.02, "learning_rate": 2.5174825174825178e-05, "loss": 1.3972, "step": 90 }, { "epoch": 0.02, "learning_rate": 2.5454545454545457e-05, "loss": 1.4067, "step": 91 }, { "epoch": 0.02, "learning_rate": 2.5734265734265736e-05, "loss": 1.428, "step": 92 }, { "epoch": 0.02, "learning_rate": 2.6013986013986015e-05, "loss": 1.4095, "step": 93 }, { "epoch": 0.02, "learning_rate": 2.6293706293706294e-05, "loss": 1.4353, "step": 94 }, { "epoch": 0.02, "learning_rate": 2.6573426573426577e-05, "loss": 1.2736, "step": 95 }, { "epoch": 0.02, "learning_rate": 2.6853146853146856e-05, "loss": 1.4096, "step": 96 }, { "epoch": 0.02, "learning_rate": 2.7132867132867135e-05, "loss": 1.3583, "step": 97 }, { "epoch": 0.02, "learning_rate": 2.7412587412587414e-05, "loss": 1.3911, "step": 98 }, { "epoch": 0.02, "learning_rate": 2.7692307692307694e-05, "loss": 1.3625, "step": 99 }, { "epoch": 0.02, "learning_rate": 2.7972027972027973e-05, "loss": 1.3575, "step": 100 }, { "epoch": 0.02, "learning_rate": 2.8251748251748252e-05, "loss": 1.2927, "step": 101 }, { "epoch": 0.02, "learning_rate": 2.8531468531468534e-05, "loss": 1.3838, "step": 102 }, { "epoch": 0.02, "learning_rate": 2.8811188811188814e-05, "loss": 1.3662, "step": 103 }, { "epoch": 0.02, "learning_rate": 2.9090909090909093e-05, "loss": 1.3649, "step": 104 }, { "epoch": 0.02, "learning_rate": 2.9370629370629372e-05, "loss": 1.3942, "step": 105 }, { "epoch": 0.02, "learning_rate": 2.965034965034965e-05, "loss": 1.4093, "step": 106 }, { "epoch": 0.02, "learning_rate": 2.993006993006993e-05, "loss": 1.3986, "step": 107 }, { "epoch": 0.02, "learning_rate": 3.020979020979021e-05, "loss": 1.3586, "step": 108 }, { "epoch": 0.02, "learning_rate": 3.048951048951049e-05, "loss": 1.4076, "step": 109 }, { "epoch": 0.02, "learning_rate": 3.0769230769230774e-05, "loss": 1.3857, "step": 110 }, { "epoch": 0.02, "learning_rate": 3.104895104895105e-05, "loss": 1.3899, "step": 111 }, { "epoch": 0.02, "learning_rate": 3.132867132867133e-05, "loss": 1.3579, "step": 112 }, { "epoch": 0.02, "learning_rate": 3.160839160839161e-05, "loss": 1.3288, "step": 113 }, { "epoch": 0.02, "learning_rate": 3.188811188811189e-05, "loss": 1.3219, "step": 114 }, { "epoch": 0.02, "learning_rate": 3.216783216783217e-05, "loss": 1.4371, "step": 115 }, { "epoch": 0.02, "learning_rate": 3.244755244755245e-05, "loss": 1.4103, "step": 116 }, { "epoch": 0.02, "learning_rate": 3.272727272727273e-05, "loss": 1.3679, "step": 117 }, { "epoch": 0.02, "learning_rate": 3.3006993006993014e-05, "loss": 1.4001, "step": 118 }, { "epoch": 0.03, "learning_rate": 3.328671328671329e-05, "loss": 1.3716, "step": 119 }, { "epoch": 0.03, "learning_rate": 3.356643356643357e-05, "loss": 1.4149, "step": 120 }, { "epoch": 0.03, "learning_rate": 3.384615384615385e-05, "loss": 1.3703, "step": 121 }, { "epoch": 0.03, "learning_rate": 3.412587412587413e-05, "loss": 1.4115, "step": 122 }, { "epoch": 0.03, "learning_rate": 3.440559440559441e-05, "loss": 1.4308, "step": 123 }, { "epoch": 0.03, "learning_rate": 3.468531468531469e-05, "loss": 1.4214, "step": 124 }, { "epoch": 0.03, "learning_rate": 3.496503496503497e-05, "loss": 1.3922, "step": 125 }, { "epoch": 0.03, "learning_rate": 3.524475524475525e-05, "loss": 2.2952, "step": 126 }, { "epoch": 0.03, "learning_rate": 3.552447552447553e-05, "loss": 1.4082, "step": 127 }, { "epoch": 0.03, "learning_rate": 3.5804195804195806e-05, "loss": 1.4501, "step": 128 }, { "epoch": 0.03, "learning_rate": 3.608391608391609e-05, "loss": 1.4269, "step": 129 }, { "epoch": 0.03, "learning_rate": 3.6363636363636364e-05, "loss": 1.3986, "step": 130 }, { "epoch": 0.03, "learning_rate": 3.664335664335665e-05, "loss": 1.429, "step": 131 }, { "epoch": 0.03, "learning_rate": 3.692307692307693e-05, "loss": 1.3727, "step": 132 }, { "epoch": 0.03, "learning_rate": 3.7202797202797205e-05, "loss": 1.3966, "step": 133 }, { "epoch": 0.03, "learning_rate": 3.748251748251749e-05, "loss": 1.4479, "step": 134 }, { "epoch": 0.03, "learning_rate": 3.776223776223776e-05, "loss": 1.2996, "step": 135 }, { "epoch": 0.03, "learning_rate": 3.8041958041958046e-05, "loss": 1.3981, "step": 136 }, { "epoch": 0.03, "learning_rate": 3.832167832167832e-05, "loss": 1.4238, "step": 137 }, { "epoch": 0.03, "learning_rate": 3.8601398601398604e-05, "loss": 1.3833, "step": 138 }, { "epoch": 0.03, "learning_rate": 3.888111888111888e-05, "loss": 1.4186, "step": 139 }, { "epoch": 0.03, "learning_rate": 3.916083916083916e-05, "loss": 1.3688, "step": 140 }, { "epoch": 0.03, "learning_rate": 3.9440559440559445e-05, "loss": 1.3687, "step": 141 }, { "epoch": 0.03, "learning_rate": 3.972027972027972e-05, "loss": 1.3736, "step": 142 }, { "epoch": 0.03, "learning_rate": 4e-05, "loss": 1.3654, "step": 143 }, { "epoch": 0.03, "learning_rate": 3.999999535795353e-05, "loss": 1.3127, "step": 144 }, { "epoch": 0.03, "learning_rate": 3.9999981431816256e-05, "loss": 1.3956, "step": 145 }, { "epoch": 0.03, "learning_rate": 3.999995822159466e-05, "loss": 1.3325, "step": 146 }, { "epoch": 0.03, "learning_rate": 3.9999925727299505e-05, "loss": 1.3311, "step": 147 }, { "epoch": 0.03, "learning_rate": 3.999988394894588e-05, "loss": 1.371, "step": 148 }, { "epoch": 0.03, "learning_rate": 3.999983288655318e-05, "loss": 1.3063, "step": 149 }, { "epoch": 0.03, "learning_rate": 3.9999772540145104e-05, "loss": 1.4212, "step": 150 }, { "epoch": 0.03, "learning_rate": 3.999970290974967e-05, "loss": 1.3853, "step": 151 }, { "epoch": 0.03, "learning_rate": 3.999962399539919e-05, "loss": 1.3556, "step": 152 }, { "epoch": 0.03, "learning_rate": 3.9999535797130304e-05, "loss": 1.3849, "step": 153 }, { "epoch": 0.03, "learning_rate": 3.999943831498395e-05, "loss": 1.3933, "step": 154 }, { "epoch": 0.03, "learning_rate": 3.9999331549005394e-05, "loss": 1.3331, "step": 155 }, { "epoch": 0.03, "learning_rate": 3.999921549924418e-05, "loss": 1.3488, "step": 156 }, { "epoch": 0.03, "learning_rate": 3.999909016575419e-05, "loss": 1.4472, "step": 157 }, { "epoch": 0.03, "learning_rate": 3.99989555485936e-05, "loss": 1.4994, "step": 158 }, { "epoch": 0.03, "learning_rate": 3.99988116478249e-05, "loss": 1.4134, "step": 159 }, { "epoch": 0.03, "learning_rate": 3.999865846351489e-05, "loss": 1.3904, "step": 160 }, { "epoch": 0.03, "learning_rate": 3.9998495995734677e-05, "loss": 1.3941, "step": 161 }, { "epoch": 0.03, "learning_rate": 3.999832424455968e-05, "loss": 1.3369, "step": 162 }, { "epoch": 0.03, "learning_rate": 3.999814321006963e-05, "loss": 1.3989, "step": 163 }, { "epoch": 0.03, "learning_rate": 3.999795289234856e-05, "loss": 1.3794, "step": 164 }, { "epoch": 0.03, "learning_rate": 3.9997753291484816e-05, "loss": 1.3702, "step": 165 }, { "epoch": 0.03, "learning_rate": 3.999754440757105e-05, "loss": 1.322, "step": 166 }, { "epoch": 0.04, "learning_rate": 3.999732624070424e-05, "loss": 1.4168, "step": 167 }, { "epoch": 0.04, "learning_rate": 3.999709879098565e-05, "loss": 1.3924, "step": 168 }, { "epoch": 0.04, "learning_rate": 3.999686205852087e-05, "loss": 1.3191, "step": 169 }, { "epoch": 0.04, "learning_rate": 3.999661604341978e-05, "loss": 1.398, "step": 170 }, { "epoch": 0.04, "learning_rate": 3.99963607457966e-05, "loss": 1.3207, "step": 171 }, { "epoch": 0.04, "learning_rate": 3.999609616576982e-05, "loss": 1.3763, "step": 172 }, { "epoch": 0.04, "learning_rate": 3.9995822303462273e-05, "loss": 1.3931, "step": 173 }, { "epoch": 0.04, "learning_rate": 3.9995539159001074e-05, "loss": 1.382, "step": 174 }, { "epoch": 0.04, "learning_rate": 3.999524673251768e-05, "loss": 1.4525, "step": 175 }, { "epoch": 0.04, "learning_rate": 3.999494502414783e-05, "loss": 1.4071, "step": 176 }, { "epoch": 0.04, "learning_rate": 3.999463403403156e-05, "loss": 1.4079, "step": 177 }, { "epoch": 0.04, "learning_rate": 3.999431376231326e-05, "loss": 1.44, "step": 178 }, { "epoch": 0.04, "learning_rate": 3.9993984209141576e-05, "loss": 1.4475, "step": 179 }, { "epoch": 0.04, "learning_rate": 3.999364537466951e-05, "loss": 1.4105, "step": 180 }, { "epoch": 0.04, "learning_rate": 3.999329725905434e-05, "loss": 1.3631, "step": 181 }, { "epoch": 0.04, "learning_rate": 3.999293986245766e-05, "loss": 1.3998, "step": 182 }, { "epoch": 0.04, "learning_rate": 3.9992573185045386e-05, "loss": 1.3647, "step": 183 }, { "epoch": 0.04, "learning_rate": 3.9992197226987725e-05, "loss": 1.4261, "step": 184 }, { "epoch": 0.04, "learning_rate": 3.999181198845919e-05, "loss": 1.3835, "step": 185 }, { "epoch": 0.04, "learning_rate": 3.999141746963862e-05, "loss": 1.3491, "step": 186 }, { "epoch": 0.04, "learning_rate": 3.999101367070916e-05, "loss": 1.3868, "step": 187 }, { "epoch": 0.04, "learning_rate": 3.9990600591858244e-05, "loss": 1.3502, "step": 188 }, { "epoch": 0.04, "learning_rate": 3.999017823327762e-05, "loss": 1.3767, "step": 189 }, { "epoch": 0.04, "learning_rate": 3.9989746595163364e-05, "loss": 1.4154, "step": 190 }, { "epoch": 0.04, "learning_rate": 3.998930567771583e-05, "loss": 1.3643, "step": 191 }, { "epoch": 0.04, "learning_rate": 3.998885548113971e-05, "loss": 1.427, "step": 192 }, { "epoch": 0.04, "learning_rate": 3.998839600564398e-05, "loss": 1.3278, "step": 193 }, { "epoch": 0.04, "learning_rate": 3.998792725144192e-05, "loss": 1.364, "step": 194 }, { "epoch": 0.04, "learning_rate": 3.9987449218751134e-05, "loss": 1.3331, "step": 195 }, { "epoch": 0.04, "learning_rate": 3.998696190779354e-05, "loss": 1.3721, "step": 196 }, { "epoch": 0.04, "learning_rate": 3.9986465318795336e-05, "loss": 1.4242, "step": 197 }, { "epoch": 0.04, "learning_rate": 3.998595945198705e-05, "loss": 1.3941, "step": 198 }, { "epoch": 0.04, "learning_rate": 3.9985444307603497e-05, "loss": 1.398, "step": 199 }, { "epoch": 0.04, "learning_rate": 3.998491988588381e-05, "loss": 1.3835, "step": 200 }, { "epoch": 0.04, "learning_rate": 3.998438618707144e-05, "loss": 1.383, "step": 201 }, { "epoch": 0.04, "learning_rate": 3.9983843211414124e-05, "loss": 1.4021, "step": 202 }, { "epoch": 0.04, "learning_rate": 3.9983290959163914e-05, "loss": 1.3831, "step": 203 }, { "epoch": 0.04, "learning_rate": 3.998272943057717e-05, "loss": 1.3201, "step": 204 }, { "epoch": 0.04, "learning_rate": 3.998215862591455e-05, "loss": 1.3822, "step": 205 }, { "epoch": 0.04, "learning_rate": 3.998157854544104e-05, "loss": 1.3514, "step": 206 }, { "epoch": 0.04, "learning_rate": 3.99809891894259e-05, "loss": 1.3992, "step": 207 }, { "epoch": 0.04, "learning_rate": 3.998039055814272e-05, "loss": 1.3909, "step": 208 }, { "epoch": 0.04, "learning_rate": 3.9979782651869384e-05, "loss": 1.3097, "step": 209 }, { "epoch": 0.04, "learning_rate": 3.997916547088808e-05, "loss": 1.3593, "step": 210 }, { "epoch": 0.04, "learning_rate": 3.997853901548532e-05, "loss": 1.3284, "step": 211 }, { "epoch": 0.04, "learning_rate": 3.9977903285951896e-05, "loss": 1.3946, "step": 212 }, { "epoch": 0.04, "learning_rate": 3.9977258282582916e-05, "loss": 1.3739, "step": 213 }, { "epoch": 0.05, "learning_rate": 3.99766040056778e-05, "loss": 1.371, "step": 214 }, { "epoch": 0.05, "learning_rate": 3.997594045554027e-05, "loss": 1.3733, "step": 215 }, { "epoch": 0.05, "learning_rate": 3.9975267632478336e-05, "loss": 1.3776, "step": 216 }, { "epoch": 0.05, "learning_rate": 3.997458553680434e-05, "loss": 1.3519, "step": 217 }, { "epoch": 0.05, "learning_rate": 3.99738941688349e-05, "loss": 1.3254, "step": 218 }, { "epoch": 0.05, "learning_rate": 3.997319352889096e-05, "loss": 1.3786, "step": 219 }, { "epoch": 0.05, "learning_rate": 3.997248361729777e-05, "loss": 1.3308, "step": 220 }, { "epoch": 0.05, "learning_rate": 3.997176443438485e-05, "loss": 1.3666, "step": 221 }, { "epoch": 0.05, "learning_rate": 3.997103598048607e-05, "loss": 1.3565, "step": 222 }, { "epoch": 0.05, "learning_rate": 3.9970298255939564e-05, "loss": 1.342, "step": 223 }, { "epoch": 0.05, "learning_rate": 3.9969551261087806e-05, "loss": 1.4034, "step": 224 }, { "epoch": 0.05, "learning_rate": 3.996879499627754e-05, "loss": 1.3279, "step": 225 }, { "epoch": 0.05, "learning_rate": 3.996802946185984e-05, "loss": 1.3397, "step": 226 }, { "epoch": 0.05, "learning_rate": 3.9967254658190055e-05, "loss": 1.4121, "step": 227 }, { "epoch": 0.05, "learning_rate": 3.996647058562786e-05, "loss": 1.3868, "step": 228 }, { "epoch": 0.05, "learning_rate": 3.9965677244537226e-05, "loss": 1.2964, "step": 229 }, { "epoch": 0.05, "learning_rate": 3.9964874635286436e-05, "loss": 1.3671, "step": 230 }, { "epoch": 0.05, "learning_rate": 3.996406275824804e-05, "loss": 1.3471, "step": 231 }, { "epoch": 0.05, "learning_rate": 3.996324161379894e-05, "loss": 1.309, "step": 232 }, { "epoch": 0.05, "learning_rate": 3.9962411202320296e-05, "loss": 1.382, "step": 233 }, { "epoch": 0.05, "learning_rate": 3.99615715241976e-05, "loss": 1.3367, "step": 234 }, { "epoch": 0.05, "learning_rate": 3.996072257982064e-05, "loss": 1.3915, "step": 235 }, { "epoch": 0.05, "learning_rate": 3.9959864369583485e-05, "loss": 1.423, "step": 236 }, { "epoch": 0.05, "learning_rate": 3.9958996893884525e-05, "loss": 1.3617, "step": 237 }, { "epoch": 0.05, "learning_rate": 3.9958120153126454e-05, "loss": 1.3036, "step": 238 }, { "epoch": 0.05, "learning_rate": 3.995723414771625e-05, "loss": 1.3381, "step": 239 }, { "epoch": 0.05, "learning_rate": 3.9956338878065205e-05, "loss": 1.3624, "step": 240 }, { "epoch": 0.05, "learning_rate": 3.99554343445889e-05, "loss": 1.4119, "step": 241 }, { "epoch": 0.05, "learning_rate": 3.995452054770724e-05, "loss": 1.391, "step": 242 }, { "epoch": 0.05, "learning_rate": 3.99535974878444e-05, "loss": 1.3295, "step": 243 }, { "epoch": 0.05, "learning_rate": 3.995266516542887e-05, "loss": 1.3407, "step": 244 }, { "epoch": 0.05, "learning_rate": 3.995172358089344e-05, "loss": 1.3814, "step": 245 }, { "epoch": 0.05, "learning_rate": 3.995077273467521e-05, "loss": 1.3753, "step": 246 }, { "epoch": 0.05, "learning_rate": 3.994981262721555e-05, "loss": 1.337, "step": 247 }, { "epoch": 0.05, "learning_rate": 3.9948843258960154e-05, "loss": 1.3938, "step": 248 }, { "epoch": 0.05, "learning_rate": 3.9947864630359005e-05, "loss": 1.3571, "step": 249 }, { "epoch": 0.05, "learning_rate": 3.994687674186638e-05, "loss": 1.3662, "step": 250 }, { "epoch": 0.05, "learning_rate": 3.9945879593940874e-05, "loss": 1.3636, "step": 251 }, { "epoch": 0.05, "learning_rate": 3.994487318704536e-05, "loss": 1.3692, "step": 252 }, { "epoch": 0.05, "learning_rate": 3.994385752164703e-05, "loss": 1.413, "step": 253 }, { "epoch": 0.05, "learning_rate": 3.9942832598217345e-05, "loss": 1.4184, "step": 254 }, { "epoch": 0.05, "learning_rate": 3.9941798417232084e-05, "loss": 1.3372, "step": 255 }, { "epoch": 0.05, "learning_rate": 3.9940754979171317e-05, "loss": 1.3661, "step": 256 }, { "epoch": 0.05, "learning_rate": 3.9939702284519416e-05, "loss": 1.393, "step": 257 }, { "epoch": 0.05, "learning_rate": 3.9938640333765046e-05, "loss": 1.4067, "step": 258 }, { "epoch": 0.05, "learning_rate": 3.993756912740117e-05, "loss": 1.358, "step": 259 }, { "epoch": 0.05, "learning_rate": 3.9936488665925045e-05, "loss": 1.3745, "step": 260 }, { "epoch": 0.05, "learning_rate": 3.993539894983823e-05, "loss": 1.3749, "step": 261 }, { "epoch": 0.06, "learning_rate": 3.993429997964657e-05, "loss": 1.3752, "step": 262 }, { "epoch": 0.06, "learning_rate": 3.993319175586021e-05, "loss": 1.3392, "step": 263 }, { "epoch": 0.06, "learning_rate": 3.9932074278993604e-05, "loss": 1.4217, "step": 264 }, { "epoch": 0.06, "learning_rate": 3.993094754956549e-05, "loss": 1.4214, "step": 265 }, { "epoch": 0.06, "learning_rate": 3.992981156809889e-05, "loss": 1.3625, "step": 266 }, { "epoch": 0.06, "learning_rate": 3.9928666335121135e-05, "loss": 1.4306, "step": 267 }, { "epoch": 0.06, "learning_rate": 3.992751185116385e-05, "loss": 1.3936, "step": 268 }, { "epoch": 0.06, "learning_rate": 3.992634811676296e-05, "loss": 1.3801, "step": 269 }, { "epoch": 0.06, "learning_rate": 3.992517513245865e-05, "loss": 1.3638, "step": 270 }, { "epoch": 0.06, "learning_rate": 3.992399289879546e-05, "loss": 1.4009, "step": 271 }, { "epoch": 0.06, "learning_rate": 3.992280141632216e-05, "loss": 1.3206, "step": 272 }, { "epoch": 0.06, "learning_rate": 3.9921600685591856e-05, "loss": 1.3925, "step": 273 }, { "epoch": 0.06, "learning_rate": 3.9920390707161927e-05, "loss": 1.4102, "step": 274 }, { "epoch": 0.06, "learning_rate": 3.9919171481594056e-05, "loss": 1.348, "step": 275 }, { "epoch": 0.06, "learning_rate": 3.9917943009454206e-05, "loss": 1.3055, "step": 276 }, { "epoch": 0.06, "learning_rate": 3.9916705291312646e-05, "loss": 1.3889, "step": 277 }, { "epoch": 0.06, "learning_rate": 3.991545832774393e-05, "loss": 1.3769, "step": 278 }, { "epoch": 0.06, "learning_rate": 3.9914202119326895e-05, "loss": 1.3341, "step": 279 }, { "epoch": 0.06, "learning_rate": 3.991293666664469e-05, "loss": 1.2588, "step": 280 }, { "epoch": 0.06, "learning_rate": 3.991166197028474e-05, "loss": 1.3687, "step": 281 }, { "epoch": 0.06, "learning_rate": 3.9910378030838765e-05, "loss": 1.3817, "step": 282 }, { "epoch": 0.06, "learning_rate": 3.990908484890277e-05, "loss": 1.3244, "step": 283 }, { "epoch": 0.06, "learning_rate": 3.990778242507707e-05, "loss": 1.3482, "step": 284 }, { "epoch": 0.06, "learning_rate": 3.990647075996624e-05, "loss": 1.3568, "step": 285 }, { "epoch": 0.06, "learning_rate": 3.9905149854179174e-05, "loss": 1.3464, "step": 286 }, { "epoch": 0.06, "learning_rate": 3.990381970832903e-05, "loss": 1.3846, "step": 287 }, { "epoch": 0.06, "learning_rate": 3.9902480323033285e-05, "loss": 1.3303, "step": 288 }, { "epoch": 0.06, "learning_rate": 3.990113169891367e-05, "loss": 1.3757, "step": 289 }, { "epoch": 0.06, "learning_rate": 3.989977383659624e-05, "loss": 1.3703, "step": 290 }, { "epoch": 0.06, "learning_rate": 3.989840673671131e-05, "loss": 1.3292, "step": 291 }, { "epoch": 0.06, "learning_rate": 3.989703039989349e-05, "loss": 1.3497, "step": 292 }, { "epoch": 0.06, "learning_rate": 3.989564482678168e-05, "loss": 1.3751, "step": 293 }, { "epoch": 0.06, "learning_rate": 3.989425001801909e-05, "loss": 1.384, "step": 294 }, { "epoch": 0.06, "learning_rate": 3.9892845974253184e-05, "loss": 1.3478, "step": 295 }, { "epoch": 0.06, "learning_rate": 3.989143269613572e-05, "loss": 1.3822, "step": 296 }, { "epoch": 0.06, "learning_rate": 3.989001018432276e-05, "loss": 1.3131, "step": 297 }, { "epoch": 0.06, "learning_rate": 3.988857843947463e-05, "loss": 1.3686, "step": 298 }, { "epoch": 0.06, "learning_rate": 3.988713746225596e-05, "loss": 1.4093, "step": 299 }, { "epoch": 0.06, "learning_rate": 3.988568725333565e-05, "loss": 1.4109, "step": 300 }, { "epoch": 0.06, "learning_rate": 3.98842278133869e-05, "loss": 1.3869, "step": 301 }, { "epoch": 0.06, "learning_rate": 3.9882759143087194e-05, "loss": 1.3923, "step": 302 }, { "epoch": 0.06, "learning_rate": 3.9881281243118285e-05, "loss": 1.3905, "step": 303 }, { "epoch": 0.06, "learning_rate": 3.987979411416623e-05, "loss": 1.3664, "step": 304 }, { "epoch": 0.06, "learning_rate": 3.987829775692135e-05, "loss": 1.3708, "step": 305 }, { "epoch": 0.06, "learning_rate": 3.987679217207827e-05, "loss": 1.2989, "step": 306 }, { "epoch": 0.06, "learning_rate": 3.987527736033589e-05, "loss": 1.3313, "step": 307 }, { "epoch": 0.06, "learning_rate": 3.987375332239739e-05, "loss": 1.342, "step": 308 }, { "epoch": 0.06, "learning_rate": 3.9872220058970226e-05, "loss": 1.3617, "step": 309 }, { "epoch": 0.07, "learning_rate": 3.9870677570766167e-05, "loss": 1.3414, "step": 310 }, { "epoch": 0.07, "learning_rate": 3.986912585850123e-05, "loss": 1.346, "step": 311 }, { "epoch": 0.07, "learning_rate": 3.9867564922895724e-05, "loss": 1.3326, "step": 312 }, { "epoch": 0.07, "learning_rate": 3.986599476467425e-05, "loss": 1.3472, "step": 313 }, { "epoch": 0.07, "learning_rate": 3.9864415384565675e-05, "loss": 1.3789, "step": 314 }, { "epoch": 0.07, "learning_rate": 3.986282678330316e-05, "loss": 1.3781, "step": 315 }, { "epoch": 0.07, "learning_rate": 3.9861228961624146e-05, "loss": 1.2815, "step": 316 }, { "epoch": 0.07, "learning_rate": 3.985962192027034e-05, "loss": 1.3893, "step": 317 }, { "epoch": 0.07, "learning_rate": 3.985800565998775e-05, "loss": 1.3448, "step": 318 }, { "epoch": 0.07, "learning_rate": 3.9856380181526634e-05, "loss": 1.3941, "step": 319 }, { "epoch": 0.07, "learning_rate": 3.9854745485641556e-05, "loss": 1.388, "step": 320 }, { "epoch": 0.07, "learning_rate": 3.985310157309135e-05, "loss": 1.3739, "step": 321 }, { "epoch": 0.07, "learning_rate": 3.985144844463913e-05, "loss": 1.3588, "step": 322 }, { "epoch": 0.07, "learning_rate": 3.9849786101052285e-05, "loss": 1.3605, "step": 323 }, { "epoch": 0.07, "learning_rate": 3.984811454310248e-05, "loss": 1.4181, "step": 324 }, { "epoch": 0.07, "learning_rate": 3.9846433771565655e-05, "loss": 1.3938, "step": 325 }, { "epoch": 0.07, "learning_rate": 3.9844743787222046e-05, "loss": 1.3191, "step": 326 }, { "epoch": 0.07, "learning_rate": 3.984304459085614e-05, "loss": 1.3999, "step": 327 }, { "epoch": 0.07, "learning_rate": 3.984133618325671e-05, "loss": 1.3927, "step": 328 }, { "epoch": 0.07, "learning_rate": 3.983961856521682e-05, "loss": 1.4153, "step": 329 }, { "epoch": 0.07, "learning_rate": 3.983789173753378e-05, "loss": 1.3608, "step": 330 }, { "epoch": 0.07, "learning_rate": 3.983615570100921e-05, "loss": 1.3745, "step": 331 }, { "epoch": 0.07, "learning_rate": 3.9834410456448966e-05, "loss": 1.3283, "step": 332 }, { "epoch": 0.07, "learning_rate": 3.983265600466321e-05, "loss": 1.3819, "step": 333 }, { "epoch": 0.07, "learning_rate": 3.983089234646637e-05, "loss": 1.3575, "step": 334 }, { "epoch": 0.07, "learning_rate": 3.9829119482677144e-05, "loss": 1.3852, "step": 335 }, { "epoch": 0.07, "learning_rate": 3.9827337414118486e-05, "loss": 1.3329, "step": 336 }, { "epoch": 0.07, "learning_rate": 3.982554614161766e-05, "loss": 1.3351, "step": 337 }, { "epoch": 0.07, "learning_rate": 3.9823745666006176e-05, "loss": 1.3919, "step": 338 }, { "epoch": 0.07, "learning_rate": 3.982193598811983e-05, "loss": 1.3631, "step": 339 }, { "epoch": 0.07, "learning_rate": 3.9820117108798666e-05, "loss": 1.3529, "step": 340 }, { "epoch": 0.07, "learning_rate": 3.981828902888704e-05, "loss": 1.3448, "step": 341 }, { "epoch": 0.07, "learning_rate": 3.981645174923353e-05, "loss": 1.3787, "step": 342 }, { "epoch": 0.07, "learning_rate": 3.9814605270691025e-05, "loss": 1.3678, "step": 343 }, { "epoch": 0.07, "learning_rate": 3.981274959411667e-05, "loss": 1.3527, "step": 344 }, { "epoch": 0.07, "learning_rate": 3.9810884720371874e-05, "loss": 1.3531, "step": 345 }, { "epoch": 0.07, "learning_rate": 3.980901065032232e-05, "loss": 1.3508, "step": 346 }, { "epoch": 0.07, "learning_rate": 3.9807127384837955e-05, "loss": 1.3343, "step": 347 }, { "epoch": 0.07, "learning_rate": 3.980523492479301e-05, "loss": 1.374, "step": 348 }, { "epoch": 0.07, "learning_rate": 3.980333327106596e-05, "loss": 1.326, "step": 349 }, { "epoch": 0.07, "learning_rate": 3.980142242453958e-05, "loss": 1.326, "step": 350 }, { "epoch": 0.07, "learning_rate": 3.979950238610088e-05, "loss": 1.2934, "step": 351 }, { "epoch": 0.07, "learning_rate": 3.9797573156641165e-05, "loss": 1.3524, "step": 352 }, { "epoch": 0.07, "learning_rate": 3.9795634737055974e-05, "loss": 1.3736, "step": 353 }, { "epoch": 0.07, "learning_rate": 3.979368712824514e-05, "loss": 1.3553, "step": 354 }, { "epoch": 0.07, "learning_rate": 3.979173033111275e-05, "loss": 1.337, "step": 355 }, { "epoch": 0.07, "learning_rate": 3.9789764346567154e-05, "loss": 1.3571, "step": 356 }, { "epoch": 0.08, "learning_rate": 3.978778917552099e-05, "loss": 1.3803, "step": 357 }, { "epoch": 0.08, "learning_rate": 3.9785804818891117e-05, "loss": 1.3336, "step": 358 }, { "epoch": 0.08, "learning_rate": 3.978381127759869e-05, "loss": 1.3586, "step": 359 }, { "epoch": 0.08, "learning_rate": 3.9781808552569134e-05, "loss": 1.3318, "step": 360 }, { "epoch": 0.08, "learning_rate": 3.97797966447321e-05, "loss": 1.4023, "step": 361 }, { "epoch": 0.08, "learning_rate": 3.977777555502155e-05, "loss": 1.3615, "step": 362 }, { "epoch": 0.08, "learning_rate": 3.977574528437567e-05, "loss": 1.3628, "step": 363 }, { "epoch": 0.08, "learning_rate": 3.977370583373692e-05, "loss": 1.3542, "step": 364 }, { "epoch": 0.08, "learning_rate": 3.9771657204052026e-05, "loss": 1.3581, "step": 365 }, { "epoch": 0.08, "learning_rate": 3.976959939627196e-05, "loss": 1.3722, "step": 366 }, { "epoch": 0.08, "learning_rate": 3.9767532411351985e-05, "loss": 1.3393, "step": 367 }, { "epoch": 0.08, "learning_rate": 3.97654562502516e-05, "loss": 1.4009, "step": 368 }, { "epoch": 0.08, "learning_rate": 3.9763370913934554e-05, "loss": 1.3111, "step": 369 }, { "epoch": 0.08, "learning_rate": 3.976127640336889e-05, "loss": 1.3528, "step": 370 }, { "epoch": 0.08, "learning_rate": 3.9759172719526876e-05, "loss": 1.3229, "step": 371 }, { "epoch": 0.08, "learning_rate": 3.975705986338505e-05, "loss": 1.3766, "step": 372 }, { "epoch": 0.08, "learning_rate": 3.9754937835924214e-05, "loss": 1.3366, "step": 373 }, { "epoch": 0.08, "learning_rate": 3.9752806638129435e-05, "loss": 1.2942, "step": 374 }, { "epoch": 0.08, "learning_rate": 3.975066627099e-05, "loss": 1.3489, "step": 375 }, { "epoch": 0.08, "learning_rate": 3.974851673549951e-05, "loss": 1.318, "step": 376 }, { "epoch": 0.08, "learning_rate": 3.9746358032655764e-05, "loss": 1.3541, "step": 377 }, { "epoch": 0.08, "learning_rate": 3.974419016346084e-05, "loss": 1.3754, "step": 378 }, { "epoch": 0.08, "learning_rate": 3.974201312892109e-05, "loss": 1.379, "step": 379 }, { "epoch": 0.08, "learning_rate": 3.9739826930047095e-05, "loss": 1.3513, "step": 380 }, { "epoch": 0.08, "learning_rate": 3.9737631567853695e-05, "loss": 1.3605, "step": 381 }, { "epoch": 0.08, "learning_rate": 3.9735427043359996e-05, "loss": 1.3699, "step": 382 }, { "epoch": 0.08, "learning_rate": 3.973321335758934e-05, "loss": 1.3238, "step": 383 }, { "epoch": 0.08, "learning_rate": 3.973099051156934e-05, "loss": 1.3938, "step": 384 }, { "epoch": 0.08, "learning_rate": 3.972875850633184e-05, "loss": 1.3824, "step": 385 }, { "epoch": 0.08, "learning_rate": 3.9726517342912954e-05, "loss": 1.3792, "step": 386 }, { "epoch": 0.08, "learning_rate": 3.972426702235304e-05, "loss": 1.3718, "step": 387 }, { "epoch": 0.08, "learning_rate": 3.972200754569671e-05, "loss": 1.3514, "step": 388 }, { "epoch": 0.08, "learning_rate": 3.9719738913992815e-05, "loss": 1.3538, "step": 389 }, { "epoch": 0.08, "learning_rate": 3.971746112829447e-05, "loss": 1.3107, "step": 390 }, { "epoch": 0.08, "learning_rate": 3.9715174189659036e-05, "loss": 1.329, "step": 391 }, { "epoch": 0.08, "learning_rate": 3.971287809914811e-05, "loss": 1.3611, "step": 392 }, { "epoch": 0.08, "learning_rate": 3.971057285782757e-05, "loss": 1.3872, "step": 393 }, { "epoch": 0.08, "learning_rate": 3.970825846676749e-05, "loss": 1.3523, "step": 394 }, { "epoch": 0.08, "learning_rate": 3.970593492704225e-05, "loss": 1.3117, "step": 395 }, { "epoch": 0.08, "learning_rate": 3.9703602239730425e-05, "loss": 1.3709, "step": 396 }, { "epoch": 0.08, "learning_rate": 3.9701260405914874e-05, "loss": 1.3922, "step": 397 }, { "epoch": 0.08, "learning_rate": 3.9698909426682674e-05, "loss": 1.3415, "step": 398 }, { "epoch": 0.08, "learning_rate": 3.9696549303125176e-05, "loss": 1.3688, "step": 399 }, { "epoch": 0.08, "learning_rate": 3.969418003633795e-05, "loss": 1.3396, "step": 400 }, { "epoch": 0.08, "learning_rate": 3.969180162742082e-05, "loss": 1.3672, "step": 401 }, { "epoch": 0.08, "learning_rate": 3.9689414077477865e-05, "loss": 1.3191, "step": 402 }, { "epoch": 0.08, "learning_rate": 3.968701738761739e-05, "loss": 1.2855, "step": 403 }, { "epoch": 0.08, "learning_rate": 3.968461155895194e-05, "loss": 1.3558, "step": 404 }, { "epoch": 0.09, "learning_rate": 3.9682196592598324e-05, "loss": 1.3334, "step": 405 }, { "epoch": 0.09, "learning_rate": 3.967977248967758e-05, "loss": 1.3646, "step": 406 }, { "epoch": 0.09, "learning_rate": 3.967733925131498e-05, "loss": 1.3379, "step": 407 }, { "epoch": 0.09, "learning_rate": 3.9674896878640054e-05, "loss": 1.3785, "step": 408 }, { "epoch": 0.09, "learning_rate": 3.9672445372786565e-05, "loss": 1.287, "step": 409 }, { "epoch": 0.09, "learning_rate": 3.96699847348925e-05, "loss": 1.3839, "step": 410 }, { "epoch": 0.09, "learning_rate": 3.966751496610011e-05, "loss": 1.3697, "step": 411 }, { "epoch": 0.09, "learning_rate": 3.966503606755586e-05, "loss": 1.3358, "step": 412 }, { "epoch": 0.09, "learning_rate": 3.9662548040410485e-05, "loss": 1.3074, "step": 413 }, { "epoch": 0.09, "learning_rate": 3.9660050885818925e-05, "loss": 1.3352, "step": 414 }, { "epoch": 0.09, "learning_rate": 3.965754460494037e-05, "loss": 1.2467, "step": 415 }, { "epoch": 0.09, "learning_rate": 3.9655029198938256e-05, "loss": 1.3878, "step": 416 }, { "epoch": 0.09, "learning_rate": 3.965250466898024e-05, "loss": 1.3401, "step": 417 }, { "epoch": 0.09, "learning_rate": 3.964997101623823e-05, "loss": 1.3288, "step": 418 }, { "epoch": 0.09, "learning_rate": 3.964742824188834e-05, "loss": 1.2528, "step": 419 }, { "epoch": 0.09, "learning_rate": 3.9644876347110956e-05, "loss": 1.3386, "step": 420 }, { "epoch": 0.09, "learning_rate": 3.964231533309067e-05, "loss": 1.3433, "step": 421 }, { "epoch": 0.09, "learning_rate": 3.963974520101632e-05, "loss": 1.3288, "step": 422 }, { "epoch": 0.09, "learning_rate": 3.963716595208098e-05, "loss": 1.3128, "step": 423 }, { "epoch": 0.09, "learning_rate": 3.963457758748193e-05, "loss": 1.3677, "step": 424 }, { "epoch": 0.09, "learning_rate": 3.963198010842073e-05, "loss": 1.3343, "step": 425 }, { "epoch": 0.09, "learning_rate": 3.9629373516103114e-05, "loss": 1.325, "step": 426 }, { "epoch": 0.09, "learning_rate": 3.9626757811739084e-05, "loss": 1.3301, "step": 427 }, { "epoch": 0.09, "learning_rate": 3.962413299654286e-05, "loss": 1.3561, "step": 428 }, { "epoch": 0.09, "learning_rate": 3.962149907173291e-05, "loss": 1.3344, "step": 429 }, { "epoch": 0.09, "learning_rate": 3.961885603853189e-05, "loss": 1.3034, "step": 430 }, { "epoch": 0.09, "learning_rate": 3.9616203898166724e-05, "loss": 1.3253, "step": 431 }, { "epoch": 0.09, "learning_rate": 3.961354265186854e-05, "loss": 1.3421, "step": 432 }, { "epoch": 0.09, "learning_rate": 3.9610872300872704e-05, "loss": 1.3792, "step": 433 }, { "epoch": 0.09, "learning_rate": 3.96081928464188e-05, "loss": 1.4123, "step": 434 }, { "epoch": 0.09, "learning_rate": 3.960550428975066e-05, "loss": 1.3507, "step": 435 }, { "epoch": 0.09, "learning_rate": 3.9602806632116304e-05, "loss": 1.362, "step": 436 }, { "epoch": 0.09, "learning_rate": 3.960009987476801e-05, "loss": 1.3853, "step": 437 }, { "epoch": 0.09, "learning_rate": 3.959738401896227e-05, "loss": 1.3272, "step": 438 }, { "epoch": 0.09, "learning_rate": 3.9594659065959774e-05, "loss": 1.3299, "step": 439 }, { "epoch": 0.09, "learning_rate": 3.959192501702548e-05, "loss": 1.3923, "step": 440 }, { "epoch": 0.09, "learning_rate": 3.958918187342855e-05, "loss": 1.3553, "step": 441 }, { "epoch": 0.09, "learning_rate": 3.9586429636442346e-05, "loss": 1.3334, "step": 442 }, { "epoch": 0.09, "learning_rate": 3.958366830734448e-05, "loss": 1.3455, "step": 443 }, { "epoch": 0.09, "learning_rate": 3.958089788741677e-05, "loss": 1.3692, "step": 444 }, { "epoch": 0.09, "learning_rate": 3.957811837794526e-05, "loss": 1.3555, "step": 445 }, { "epoch": 0.09, "learning_rate": 3.9575329780220215e-05, "loss": 1.2919, "step": 446 }, { "epoch": 0.09, "learning_rate": 3.957253209553611e-05, "loss": 1.3158, "step": 447 }, { "epoch": 0.09, "learning_rate": 3.956972532519164e-05, "loss": 1.2939, "step": 448 }, { "epoch": 0.09, "learning_rate": 3.956690947048972e-05, "loss": 1.392, "step": 449 }, { "epoch": 0.09, "learning_rate": 3.9564084532737495e-05, "loss": 1.3587, "step": 450 }, { "epoch": 0.09, "learning_rate": 3.9561250513246306e-05, "loss": 1.3468, "step": 451 }, { "epoch": 0.1, "learning_rate": 3.955840741333171e-05, "loss": 1.3749, "step": 452 }, { "epoch": 0.1, "learning_rate": 3.9555555234313506e-05, "loss": 1.3428, "step": 453 }, { "epoch": 0.1, "learning_rate": 3.9552693977515675e-05, "loss": 1.359, "step": 454 }, { "epoch": 0.1, "learning_rate": 3.9549823644266434e-05, "loss": 1.4116, "step": 455 }, { "epoch": 0.1, "learning_rate": 3.9546944235898194e-05, "loss": 1.2855, "step": 456 }, { "epoch": 0.1, "learning_rate": 3.954405575374759e-05, "loss": 1.3296, "step": 457 }, { "epoch": 0.1, "learning_rate": 3.954115819915549e-05, "loss": 1.3525, "step": 458 }, { "epoch": 0.1, "learning_rate": 3.9538251573466926e-05, "loss": 1.3051, "step": 459 }, { "epoch": 0.1, "learning_rate": 3.9535335878031185e-05, "loss": 1.3067, "step": 460 }, { "epoch": 0.1, "learning_rate": 3.953241111420174e-05, "loss": 1.3688, "step": 461 }, { "epoch": 0.1, "learning_rate": 3.9529477283336274e-05, "loss": 1.3524, "step": 462 }, { "epoch": 0.1, "learning_rate": 3.9526534386796696e-05, "loss": 1.3613, "step": 463 }, { "epoch": 0.1, "learning_rate": 3.95235824259491e-05, "loss": 1.3574, "step": 464 }, { "epoch": 0.1, "learning_rate": 3.952062140216381e-05, "loss": 1.3534, "step": 465 }, { "epoch": 0.1, "learning_rate": 3.951765131681535e-05, "loss": 1.3318, "step": 466 }, { "epoch": 0.1, "learning_rate": 3.9514672171282435e-05, "loss": 1.3822, "step": 467 }, { "epoch": 0.1, "learning_rate": 3.951168396694801e-05, "loss": 1.308, "step": 468 }, { "epoch": 0.1, "learning_rate": 3.9508686705199196e-05, "loss": 1.3783, "step": 469 }, { "epoch": 0.1, "learning_rate": 3.950568038742736e-05, "loss": 1.3454, "step": 470 }, { "epoch": 0.1, "learning_rate": 3.950266501502803e-05, "loss": 1.3656, "step": 471 }, { "epoch": 0.1, "learning_rate": 3.9499640589400964e-05, "loss": 1.3865, "step": 472 }, { "epoch": 0.1, "learning_rate": 3.949660711195011e-05, "loss": 1.3157, "step": 473 }, { "epoch": 0.1, "learning_rate": 3.949356458408363e-05, "loss": 1.3184, "step": 474 }, { "epoch": 0.1, "learning_rate": 3.9490513007213874e-05, "loss": 1.373, "step": 475 }, { "epoch": 0.1, "learning_rate": 3.94874523827574e-05, "loss": 1.2863, "step": 476 }, { "epoch": 0.1, "learning_rate": 3.9484382712134956e-05, "loss": 1.3428, "step": 477 }, { "epoch": 0.1, "learning_rate": 3.9481303996771505e-05, "loss": 1.3618, "step": 478 }, { "epoch": 0.1, "learning_rate": 3.9478216238096206e-05, "loss": 1.3291, "step": 479 }, { "epoch": 0.1, "learning_rate": 3.94751194375424e-05, "loss": 1.293, "step": 480 }, { "epoch": 0.1, "learning_rate": 3.9472013596547646e-05, "loss": 1.3879, "step": 481 }, { "epoch": 0.1, "learning_rate": 3.946889871655368e-05, "loss": 1.3692, "step": 482 }, { "epoch": 0.1, "learning_rate": 3.946577479900645e-05, "loss": 1.3788, "step": 483 }, { "epoch": 0.1, "learning_rate": 3.9462641845356096e-05, "loss": 1.335, "step": 484 }, { "epoch": 0.1, "learning_rate": 3.945949985705694e-05, "loss": 1.3058, "step": 485 }, { "epoch": 0.1, "learning_rate": 3.945634883556752e-05, "loss": 1.3531, "step": 486 }, { "epoch": 0.1, "learning_rate": 3.945318878235054e-05, "loss": 1.2844, "step": 487 }, { "epoch": 0.1, "learning_rate": 3.945001969887293e-05, "loss": 1.3488, "step": 488 }, { "epoch": 0.1, "learning_rate": 3.944684158660577e-05, "loss": 1.3626, "step": 489 }, { "epoch": 0.1, "learning_rate": 3.944365444702437e-05, "loss": 1.3428, "step": 490 }, { "epoch": 0.1, "learning_rate": 3.944045828160822e-05, "loss": 1.3372, "step": 491 }, { "epoch": 0.1, "learning_rate": 3.943725309184098e-05, "loss": 1.3614, "step": 492 }, { "epoch": 0.1, "learning_rate": 3.943403887921052e-05, "loss": 1.3507, "step": 493 }, { "epoch": 0.1, "learning_rate": 3.94308156452089e-05, "loss": 1.3577, "step": 494 }, { "epoch": 0.1, "learning_rate": 3.9427583391332354e-05, "loss": 1.3446, "step": 495 }, { "epoch": 0.1, "learning_rate": 3.94243421190813e-05, "loss": 1.3446, "step": 496 }, { "epoch": 0.1, "learning_rate": 3.9421091829960364e-05, "loss": 1.3453, "step": 497 }, { "epoch": 0.1, "learning_rate": 3.9417832525478344e-05, "loss": 1.3483, "step": 498 }, { "epoch": 0.1, "learning_rate": 3.941456420714822e-05, "loss": 1.3611, "step": 499 }, { "epoch": 0.11, "learning_rate": 3.941128687648717e-05, "loss": 1.332, "step": 500 }, { "epoch": 0.11, "learning_rate": 3.940800053501653e-05, "loss": 1.3141, "step": 501 }, { "epoch": 0.11, "learning_rate": 3.9404705184261846e-05, "loss": 1.3314, "step": 502 }, { "epoch": 0.11, "learning_rate": 3.9401400825752835e-05, "loss": 1.3375, "step": 503 }, { "epoch": 0.11, "learning_rate": 3.939808746102339e-05, "loss": 1.3588, "step": 504 }, { "epoch": 0.11, "learning_rate": 3.9394765091611596e-05, "loss": 1.3301, "step": 505 }, { "epoch": 0.11, "learning_rate": 3.939143371905971e-05, "loss": 1.323, "step": 506 }, { "epoch": 0.11, "learning_rate": 3.938809334491417e-05, "loss": 1.3438, "step": 507 }, { "epoch": 0.11, "learning_rate": 3.9384743970725596e-05, "loss": 1.3791, "step": 508 }, { "epoch": 0.11, "learning_rate": 3.938138559804878e-05, "loss": 1.3477, "step": 509 }, { "epoch": 0.11, "learning_rate": 3.9378018228442696e-05, "loss": 1.3494, "step": 510 }, { "epoch": 0.11, "learning_rate": 3.937464186347049e-05, "loss": 1.2981, "step": 511 }, { "epoch": 0.11, "learning_rate": 3.9371256504699486e-05, "loss": 1.3292, "step": 512 }, { "epoch": 0.11, "learning_rate": 3.936786215370119e-05, "loss": 1.3768, "step": 513 }, { "epoch": 0.11, "learning_rate": 3.936445881205127e-05, "loss": 1.3215, "step": 514 }, { "epoch": 0.11, "learning_rate": 3.936104648132957e-05, "loss": 1.3193, "step": 515 }, { "epoch": 0.11, "learning_rate": 3.935762516312012e-05, "loss": 1.3531, "step": 516 }, { "epoch": 0.11, "learning_rate": 3.9354194859011105e-05, "loss": 1.3579, "step": 517 }, { "epoch": 0.11, "learning_rate": 3.935075557059488e-05, "loss": 1.3258, "step": 518 }, { "epoch": 0.11, "learning_rate": 3.934730729946799e-05, "loss": 1.3648, "step": 519 }, { "epoch": 0.11, "learning_rate": 3.9343850047231144e-05, "loss": 1.2782, "step": 520 }, { "epoch": 0.11, "learning_rate": 3.9340383815489204e-05, "loss": 1.3193, "step": 521 }, { "epoch": 0.11, "learning_rate": 3.933690860585121e-05, "loss": 1.4034, "step": 522 }, { "epoch": 0.11, "learning_rate": 3.933342441993037e-05, "loss": 1.3484, "step": 523 }, { "epoch": 0.11, "learning_rate": 3.932993125934407e-05, "loss": 1.3101, "step": 524 }, { "epoch": 0.11, "learning_rate": 3.932642912571385e-05, "loss": 1.3515, "step": 525 }, { "epoch": 0.11, "learning_rate": 3.932291802066539e-05, "loss": 1.3277, "step": 526 }, { "epoch": 0.11, "learning_rate": 3.93193979458286e-05, "loss": 1.3594, "step": 527 }, { "epoch": 0.11, "learning_rate": 3.93158689028375e-05, "loss": 1.329, "step": 528 }, { "epoch": 0.11, "learning_rate": 3.931233089333027e-05, "loss": 1.3146, "step": 529 }, { "epoch": 0.11, "learning_rate": 3.9308783918949296e-05, "loss": 1.3698, "step": 530 }, { "epoch": 0.11, "learning_rate": 3.9305227981341085e-05, "loss": 1.349, "step": 531 }, { "epoch": 0.11, "learning_rate": 3.930166308215633e-05, "loss": 1.3225, "step": 532 }, { "epoch": 0.11, "learning_rate": 3.929808922304987e-05, "loss": 1.3138, "step": 533 }, { "epoch": 0.11, "learning_rate": 3.92945064056807e-05, "loss": 1.3343, "step": 534 }, { "epoch": 0.11, "learning_rate": 3.929091463171199e-05, "loss": 1.3186, "step": 535 }, { "epoch": 0.11, "learning_rate": 3.928731390281105e-05, "loss": 1.3135, "step": 536 }, { "epoch": 0.11, "learning_rate": 3.928370422064936e-05, "loss": 1.3425, "step": 537 }, { "epoch": 0.11, "learning_rate": 3.928008558690255e-05, "loss": 1.3065, "step": 538 }, { "epoch": 0.11, "learning_rate": 3.927645800325041e-05, "loss": 1.3098, "step": 539 }, { "epoch": 0.11, "learning_rate": 3.927282147137688e-05, "loss": 1.3559, "step": 540 }, { "epoch": 0.11, "learning_rate": 3.9269175992970055e-05, "loss": 1.334, "step": 541 }, { "epoch": 0.11, "learning_rate": 3.9265521569722176e-05, "loss": 1.3478, "step": 542 }, { "epoch": 0.11, "learning_rate": 3.926185820332965e-05, "loss": 1.3672, "step": 543 }, { "epoch": 0.11, "learning_rate": 3.9258185895493026e-05, "loss": 1.3822, "step": 544 }, { "epoch": 0.11, "learning_rate": 3.925450464791701e-05, "loss": 1.3705, "step": 545 }, { "epoch": 0.11, "learning_rate": 3.925081446231045e-05, "loss": 1.3478, "step": 546 }, { "epoch": 0.12, "learning_rate": 3.924711534038635e-05, "loss": 1.3549, "step": 547 }, { "epoch": 0.12, "learning_rate": 3.9243407283861866e-05, "loss": 1.2949, "step": 548 }, { "epoch": 0.12, "learning_rate": 3.923969029445828e-05, "loss": 1.3545, "step": 549 }, { "epoch": 0.12, "learning_rate": 3.923596437390105e-05, "loss": 1.355, "step": 550 }, { "epoch": 0.12, "learning_rate": 3.923222952391975e-05, "loss": 1.2885, "step": 551 }, { "epoch": 0.12, "learning_rate": 3.9228485746248134e-05, "loss": 1.3509, "step": 552 }, { "epoch": 0.12, "learning_rate": 3.922473304262406e-05, "loss": 1.3077, "step": 553 }, { "epoch": 0.12, "learning_rate": 3.922097141478957e-05, "loss": 1.3247, "step": 554 }, { "epoch": 0.12, "learning_rate": 3.921720086449082e-05, "loss": 1.3522, "step": 555 }, { "epoch": 0.12, "learning_rate": 3.921342139347811e-05, "loss": 1.354, "step": 556 }, { "epoch": 0.12, "learning_rate": 3.92096330035059e-05, "loss": 1.3082, "step": 557 }, { "epoch": 0.12, "learning_rate": 3.9205835696332775e-05, "loss": 1.358, "step": 558 }, { "epoch": 0.12, "learning_rate": 3.920202947372146e-05, "loss": 1.3389, "step": 559 }, { "epoch": 0.12, "learning_rate": 3.919821433743882e-05, "loss": 1.3166, "step": 560 }, { "epoch": 0.12, "learning_rate": 3.919439028925587e-05, "loss": 1.3377, "step": 561 }, { "epoch": 0.12, "learning_rate": 3.919055733094774e-05, "loss": 1.3287, "step": 562 }, { "epoch": 0.12, "learning_rate": 3.91867154642937e-05, "loss": 1.3511, "step": 563 }, { "epoch": 0.12, "learning_rate": 3.918286469107718e-05, "loss": 1.3573, "step": 564 }, { "epoch": 0.12, "learning_rate": 3.917900501308572e-05, "loss": 1.3141, "step": 565 }, { "epoch": 0.12, "learning_rate": 3.9175136432111e-05, "loss": 1.3611, "step": 566 }, { "epoch": 0.12, "learning_rate": 3.9171258949948827e-05, "loss": 1.3282, "step": 567 }, { "epoch": 0.12, "learning_rate": 3.916737256839916e-05, "loss": 1.3067, "step": 568 }, { "epoch": 0.12, "learning_rate": 3.916347728926606e-05, "loss": 1.3499, "step": 569 }, { "epoch": 0.12, "learning_rate": 3.915957311435774e-05, "loss": 1.2912, "step": 570 }, { "epoch": 0.12, "learning_rate": 3.915566004548654e-05, "loss": 1.3769, "step": 571 }, { "epoch": 0.12, "learning_rate": 3.915173808446892e-05, "loss": 1.2684, "step": 572 }, { "epoch": 0.12, "learning_rate": 3.914780723312548e-05, "loss": 1.3661, "step": 573 }, { "epoch": 0.12, "learning_rate": 3.914386749328093e-05, "loss": 1.3749, "step": 574 }, { "epoch": 0.12, "learning_rate": 3.913991886676412e-05, "loss": 1.3324, "step": 575 }, { "epoch": 0.12, "learning_rate": 3.9135961355408024e-05, "loss": 1.2841, "step": 576 }, { "epoch": 0.12, "learning_rate": 3.913199496104972e-05, "loss": 1.3472, "step": 577 }, { "epoch": 0.12, "learning_rate": 3.912801968553045e-05, "loss": 1.317, "step": 578 }, { "epoch": 0.12, "learning_rate": 3.9124035530695546e-05, "loss": 1.3709, "step": 579 }, { "epoch": 0.12, "learning_rate": 3.912004249839447e-05, "loss": 1.3045, "step": 580 }, { "epoch": 0.12, "learning_rate": 3.91160405904808e-05, "loss": 1.3511, "step": 581 }, { "epoch": 0.12, "learning_rate": 3.911202980881226e-05, "loss": 1.3596, "step": 582 }, { "epoch": 0.12, "learning_rate": 3.910801015525064e-05, "loss": 1.3468, "step": 583 }, { "epoch": 0.12, "learning_rate": 3.910398163166192e-05, "loss": 1.3168, "step": 584 }, { "epoch": 0.12, "learning_rate": 3.909994423991614e-05, "loss": 1.3635, "step": 585 }, { "epoch": 0.12, "learning_rate": 3.909589798188747e-05, "loss": 1.3195, "step": 586 }, { "epoch": 0.12, "learning_rate": 3.909184285945421e-05, "loss": 1.3587, "step": 587 }, { "epoch": 0.12, "learning_rate": 3.908777887449877e-05, "loss": 1.3586, "step": 588 }, { "epoch": 0.12, "learning_rate": 3.9083706028907665e-05, "loss": 1.3146, "step": 589 }, { "epoch": 0.12, "learning_rate": 3.9079624324571536e-05, "loss": 1.3445, "step": 590 }, { "epoch": 0.12, "learning_rate": 3.9075533763385116e-05, "loss": 1.4044, "step": 591 }, { "epoch": 0.12, "learning_rate": 3.9071434347247275e-05, "loss": 1.3535, "step": 592 }, { "epoch": 0.12, "learning_rate": 3.906732607806098e-05, "loss": 1.2993, "step": 593 }, { "epoch": 0.12, "learning_rate": 3.906320895773329e-05, "loss": 1.3229, "step": 594 }, { "epoch": 0.13, "learning_rate": 3.905908298817543e-05, "loss": 1.353, "step": 595 }, { "epoch": 0.13, "learning_rate": 3.905494817130265e-05, "loss": 1.3275, "step": 596 }, { "epoch": 0.13, "learning_rate": 3.9050804509034383e-05, "loss": 1.3252, "step": 597 }, { "epoch": 0.13, "learning_rate": 3.904665200329411e-05, "loss": 1.3534, "step": 598 }, { "epoch": 0.13, "learning_rate": 3.904249065600948e-05, "loss": 1.365, "step": 599 }, { "epoch": 0.13, "learning_rate": 3.903832046911218e-05, "loss": 1.3479, "step": 600 }, { "epoch": 0.13, "learning_rate": 3.9034141444538034e-05, "loss": 1.3279, "step": 601 }, { "epoch": 0.13, "learning_rate": 3.902995358422697e-05, "loss": 1.3512, "step": 602 }, { "epoch": 0.13, "learning_rate": 3.902575689012301e-05, "loss": 1.332, "step": 603 }, { "epoch": 0.13, "learning_rate": 3.9021551364174286e-05, "loss": 1.3107, "step": 604 }, { "epoch": 0.13, "learning_rate": 3.901733700833301e-05, "loss": 1.2952, "step": 605 }, { "epoch": 0.13, "learning_rate": 3.9013113824555515e-05, "loss": 1.2863, "step": 606 }, { "epoch": 0.13, "learning_rate": 3.9008881814802225e-05, "loss": 1.3365, "step": 607 }, { "epoch": 0.13, "learning_rate": 3.900464098103765e-05, "loss": 1.348, "step": 608 }, { "epoch": 0.13, "learning_rate": 3.9000391325230405e-05, "loss": 1.3923, "step": 609 }, { "epoch": 0.13, "learning_rate": 3.899613284935321e-05, "loss": 1.3236, "step": 610 }, { "epoch": 0.13, "learning_rate": 3.899186555538286e-05, "loss": 1.346, "step": 611 }, { "epoch": 0.13, "learning_rate": 3.898758944530025e-05, "loss": 1.3396, "step": 612 }, { "epoch": 0.13, "learning_rate": 3.898330452109038e-05, "loss": 1.3161, "step": 613 }, { "epoch": 0.13, "learning_rate": 3.897901078474233e-05, "loss": 1.3269, "step": 614 }, { "epoch": 0.13, "learning_rate": 3.897470823824927e-05, "loss": 1.3353, "step": 615 }, { "epoch": 0.13, "learning_rate": 3.897039688360845e-05, "loss": 1.3457, "step": 616 }, { "epoch": 0.13, "learning_rate": 3.8966076722821245e-05, "loss": 1.393, "step": 617 }, { "epoch": 0.13, "learning_rate": 3.8961747757893075e-05, "loss": 1.3907, "step": 618 }, { "epoch": 0.13, "learning_rate": 3.895740999083347e-05, "loss": 1.3242, "step": 619 }, { "epoch": 0.13, "learning_rate": 3.8953063423656055e-05, "loss": 1.2941, "step": 620 }, { "epoch": 0.13, "learning_rate": 3.8948708058378504e-05, "loss": 1.3333, "step": 621 }, { "epoch": 0.13, "learning_rate": 3.894434389702261e-05, "loss": 1.2923, "step": 622 }, { "epoch": 0.13, "learning_rate": 3.8939970941614247e-05, "loss": 1.3301, "step": 623 }, { "epoch": 0.13, "learning_rate": 3.893558919418334e-05, "loss": 1.332, "step": 624 }, { "epoch": 0.13, "learning_rate": 3.893119865676393e-05, "loss": 1.3211, "step": 625 }, { "epoch": 0.13, "learning_rate": 3.892679933139412e-05, "loss": 1.3203, "step": 626 }, { "epoch": 0.13, "learning_rate": 3.8922391220116094e-05, "loss": 1.3262, "step": 627 }, { "epoch": 0.13, "learning_rate": 3.891797432497613e-05, "loss": 1.325, "step": 628 }, { "epoch": 0.13, "learning_rate": 3.891354864802455e-05, "loss": 1.3373, "step": 629 }, { "epoch": 0.13, "learning_rate": 3.89091141913158e-05, "loss": 1.3773, "step": 630 }, { "epoch": 0.13, "learning_rate": 3.890467095690837e-05, "loss": 1.3327, "step": 631 }, { "epoch": 0.13, "learning_rate": 3.890021894686481e-05, "loss": 1.32, "step": 632 }, { "epoch": 0.13, "learning_rate": 3.8895758163251783e-05, "loss": 1.3501, "step": 633 }, { "epoch": 0.13, "learning_rate": 3.889128860814e-05, "loss": 1.3465, "step": 634 }, { "epoch": 0.13, "learning_rate": 3.8886810283604245e-05, "loss": 1.3617, "step": 635 }, { "epoch": 0.13, "learning_rate": 3.888232319172338e-05, "loss": 1.3613, "step": 636 }, { "epoch": 0.13, "learning_rate": 3.887782733458034e-05, "loss": 1.313, "step": 637 }, { "epoch": 0.13, "learning_rate": 3.887332271426211e-05, "loss": 1.3819, "step": 638 }, { "epoch": 0.13, "learning_rate": 3.886880933285977e-05, "loss": 1.3359, "step": 639 }, { "epoch": 0.13, "learning_rate": 3.886428719246845e-05, "loss": 1.2789, "step": 640 }, { "epoch": 0.13, "learning_rate": 3.885975629518734e-05, "loss": 1.3289, "step": 641 }, { "epoch": 0.14, "learning_rate": 3.8855216643119697e-05, "loss": 1.389, "step": 642 }, { "epoch": 0.14, "learning_rate": 3.885066823837287e-05, "loss": 1.3445, "step": 643 }, { "epoch": 0.14, "learning_rate": 3.884611108305824e-05, "loss": 1.2976, "step": 644 }, { "epoch": 0.14, "learning_rate": 3.8841545179291254e-05, "loss": 1.3225, "step": 645 }, { "epoch": 0.14, "learning_rate": 3.883697052919143e-05, "loss": 1.3322, "step": 646 }, { "epoch": 0.14, "learning_rate": 3.883238713488235e-05, "loss": 1.3157, "step": 647 }, { "epoch": 0.14, "learning_rate": 3.882779499849163e-05, "loss": 1.3124, "step": 648 }, { "epoch": 0.14, "learning_rate": 3.8823194122150975e-05, "loss": 1.3327, "step": 649 }, { "epoch": 0.14, "learning_rate": 3.881858450799612e-05, "loss": 1.3617, "step": 650 }, { "epoch": 0.14, "learning_rate": 3.8813966158166894e-05, "loss": 1.3349, "step": 651 }, { "epoch": 0.14, "learning_rate": 3.8809339074807125e-05, "loss": 1.3099, "step": 652 }, { "epoch": 0.14, "learning_rate": 3.8804703260064756e-05, "loss": 1.3449, "step": 653 }, { "epoch": 0.14, "learning_rate": 3.880005871609173e-05, "loss": 1.3623, "step": 654 }, { "epoch": 0.14, "learning_rate": 3.879540544504408e-05, "loss": 1.3599, "step": 655 }, { "epoch": 0.14, "learning_rate": 3.879074344908187e-05, "loss": 1.3152, "step": 656 }, { "epoch": 0.14, "learning_rate": 3.878607273036922e-05, "loss": 1.3683, "step": 657 }, { "epoch": 0.14, "learning_rate": 3.8781393291074296e-05, "loss": 1.3504, "step": 658 }, { "epoch": 0.14, "learning_rate": 3.8776705133369333e-05, "loss": 1.3143, "step": 659 }, { "epoch": 0.14, "learning_rate": 3.8772008259430575e-05, "loss": 1.3358, "step": 660 }, { "epoch": 0.14, "learning_rate": 3.876730267143834e-05, "loss": 1.3297, "step": 661 }, { "epoch": 0.14, "learning_rate": 3.876258837157699e-05, "loss": 1.3036, "step": 662 }, { "epoch": 0.14, "learning_rate": 3.8757865362034914e-05, "loss": 1.3259, "step": 663 }, { "epoch": 0.14, "learning_rate": 3.875313364500456e-05, "loss": 1.2908, "step": 664 }, { "epoch": 0.14, "learning_rate": 3.8748393222682425e-05, "loss": 1.3005, "step": 665 }, { "epoch": 0.14, "learning_rate": 3.874364409726901e-05, "loss": 1.3045, "step": 666 }, { "epoch": 0.14, "learning_rate": 3.87388862709689e-05, "loss": 1.3331, "step": 667 }, { "epoch": 0.14, "learning_rate": 3.8734119745990696e-05, "loss": 1.3348, "step": 668 }, { "epoch": 0.14, "learning_rate": 3.872934452454704e-05, "loss": 1.3533, "step": 669 }, { "epoch": 0.14, "learning_rate": 3.872456060885461e-05, "loss": 1.288, "step": 670 }, { "epoch": 0.14, "learning_rate": 3.8719768001134124e-05, "loss": 1.3604, "step": 671 }, { "epoch": 0.14, "learning_rate": 3.871496670361033e-05, "loss": 1.3343, "step": 672 }, { "epoch": 0.14, "learning_rate": 3.871015671851202e-05, "loss": 1.3566, "step": 673 }, { "epoch": 0.14, "learning_rate": 3.870533804807201e-05, "loss": 1.354, "step": 674 }, { "epoch": 0.14, "learning_rate": 3.870051069452714e-05, "loss": 1.3175, "step": 675 }, { "epoch": 0.14, "learning_rate": 3.8695674660118294e-05, "loss": 1.3104, "step": 676 }, { "epoch": 0.14, "learning_rate": 3.8690829947090386e-05, "loss": 1.3424, "step": 677 }, { "epoch": 0.14, "learning_rate": 3.868597655769235e-05, "loss": 1.3718, "step": 678 }, { "epoch": 0.14, "learning_rate": 3.868111449417716e-05, "loss": 1.3294, "step": 679 }, { "epoch": 0.14, "learning_rate": 3.867624375880179e-05, "loss": 1.2584, "step": 680 }, { "epoch": 0.14, "learning_rate": 3.8671364353827284e-05, "loss": 1.369, "step": 681 }, { "epoch": 0.14, "learning_rate": 3.8666476281518665e-05, "loss": 1.3192, "step": 682 }, { "epoch": 0.14, "learning_rate": 3.8661579544145e-05, "loss": 1.3639, "step": 683 }, { "epoch": 0.14, "learning_rate": 3.8656674143979386e-05, "loss": 1.2207, "step": 684 }, { "epoch": 0.14, "learning_rate": 3.8651760083298926e-05, "loss": 1.3189, "step": 685 }, { "epoch": 0.14, "learning_rate": 3.864683736438475e-05, "loss": 1.3861, "step": 686 }, { "epoch": 0.14, "learning_rate": 3.8641905989522016e-05, "loss": 1.3206, "step": 687 }, { "epoch": 0.14, "learning_rate": 3.863696596099988e-05, "loss": 1.317, "step": 688 }, { "epoch": 0.14, "learning_rate": 3.863201728111153e-05, "loss": 1.3675, "step": 689 }, { "epoch": 0.15, "learning_rate": 3.862705995215417e-05, "loss": 1.3608, "step": 690 }, { "epoch": 0.15, "learning_rate": 3.862209397642901e-05, "loss": 1.3174, "step": 691 }, { "epoch": 0.15, "learning_rate": 3.861711935624129e-05, "loss": 1.3038, "step": 692 }, { "epoch": 0.15, "learning_rate": 3.8612136093900224e-05, "loss": 1.2865, "step": 693 }, { "epoch": 0.15, "learning_rate": 3.860714419171909e-05, "loss": 1.3025, "step": 694 }, { "epoch": 0.15, "learning_rate": 3.860214365201515e-05, "loss": 1.3254, "step": 695 }, { "epoch": 0.15, "learning_rate": 3.8597134477109674e-05, "loss": 1.2729, "step": 696 }, { "epoch": 0.15, "learning_rate": 3.8592116669327945e-05, "loss": 1.3217, "step": 697 }, { "epoch": 0.15, "learning_rate": 3.858709023099925e-05, "loss": 1.3261, "step": 698 }, { "epoch": 0.15, "learning_rate": 3.858205516445689e-05, "loss": 1.3223, "step": 699 }, { "epoch": 0.15, "learning_rate": 3.857701147203816e-05, "loss": 1.3326, "step": 700 }, { "epoch": 0.15, "learning_rate": 3.857195915608437e-05, "loss": 1.3546, "step": 701 }, { "epoch": 0.15, "learning_rate": 3.8566898218940825e-05, "loss": 1.344, "step": 702 }, { "epoch": 0.15, "learning_rate": 3.856182866295684e-05, "loss": 1.3246, "step": 703 }, { "epoch": 0.15, "learning_rate": 3.8556750490485724e-05, "loss": 1.3234, "step": 704 }, { "epoch": 0.15, "learning_rate": 3.855166370388479e-05, "loss": 1.3433, "step": 705 }, { "epoch": 0.15, "learning_rate": 3.8546568305515345e-05, "loss": 1.3348, "step": 706 }, { "epoch": 0.15, "learning_rate": 3.85414642977427e-05, "loss": 1.3573, "step": 707 }, { "epoch": 0.15, "learning_rate": 3.8536351682936155e-05, "loss": 1.3409, "step": 708 }, { "epoch": 0.15, "learning_rate": 3.8531230463469015e-05, "loss": 1.3084, "step": 709 }, { "epoch": 0.15, "learning_rate": 3.852610064171857e-05, "loss": 1.3788, "step": 710 }, { "epoch": 0.15, "learning_rate": 3.85209622200661e-05, "loss": 1.3381, "step": 711 }, { "epoch": 0.15, "learning_rate": 3.8515815200896905e-05, "loss": 1.3015, "step": 712 }, { "epoch": 0.15, "learning_rate": 3.851065958660023e-05, "loss": 1.3084, "step": 713 }, { "epoch": 0.15, "learning_rate": 3.8505495379569354e-05, "loss": 1.3429, "step": 714 }, { "epoch": 0.15, "learning_rate": 3.850032258220152e-05, "loss": 1.3328, "step": 715 }, { "epoch": 0.15, "learning_rate": 3.849514119689796e-05, "loss": 1.3462, "step": 716 }, { "epoch": 0.15, "learning_rate": 3.84899512260639e-05, "loss": 1.2932, "step": 717 }, { "epoch": 0.15, "learning_rate": 3.848475267210856e-05, "loss": 1.3455, "step": 718 }, { "epoch": 0.15, "learning_rate": 3.8479545537445115e-05, "loss": 1.348, "step": 719 }, { "epoch": 0.15, "learning_rate": 3.847432982449075e-05, "loss": 1.3449, "step": 720 }, { "epoch": 0.15, "learning_rate": 3.846910553566662e-05, "loss": 1.2975, "step": 721 }, { "epoch": 0.15, "learning_rate": 3.846387267339787e-05, "loss": 1.3452, "step": 722 }, { "epoch": 0.15, "learning_rate": 3.845863124011361e-05, "loss": 1.3705, "step": 723 }, { "epoch": 0.15, "learning_rate": 3.845338123824694e-05, "loss": 1.3662, "step": 724 }, { "epoch": 0.15, "learning_rate": 3.844812267023495e-05, "loss": 1.3419, "step": 725 }, { "epoch": 0.15, "learning_rate": 3.8442855538518667e-05, "loss": 1.3088, "step": 726 }, { "epoch": 0.15, "learning_rate": 3.8437579845543133e-05, "loss": 1.2827, "step": 727 }, { "epoch": 0.15, "learning_rate": 3.843229559375735e-05, "loss": 1.2962, "step": 728 }, { "epoch": 0.15, "learning_rate": 3.842700278561429e-05, "loss": 1.2687, "step": 729 }, { "epoch": 0.15, "learning_rate": 3.8421701423570895e-05, "loss": 1.2588, "step": 730 }, { "epoch": 0.15, "learning_rate": 3.841639151008809e-05, "loss": 1.2686, "step": 731 }, { "epoch": 0.15, "learning_rate": 3.8411073047630745e-05, "loss": 1.3598, "step": 732 }, { "epoch": 0.15, "learning_rate": 3.840574603866774e-05, "loss": 1.309, "step": 733 }, { "epoch": 0.15, "learning_rate": 3.840041048567188e-05, "loss": 1.3183, "step": 734 }, { "epoch": 0.15, "learning_rate": 3.839506639111996e-05, "loss": 1.3085, "step": 735 }, { "epoch": 0.15, "learning_rate": 3.838971375749272e-05, "loss": 1.315, "step": 736 }, { "epoch": 0.16, "learning_rate": 3.83843525872749e-05, "loss": 1.3152, "step": 737 }, { "epoch": 0.16, "learning_rate": 3.837898288295516e-05, "loss": 1.2853, "step": 738 }, { "epoch": 0.16, "learning_rate": 3.837360464702616e-05, "loss": 1.3565, "step": 739 }, { "epoch": 0.16, "learning_rate": 3.8368217881984484e-05, "loss": 1.2939, "step": 740 }, { "epoch": 0.16, "learning_rate": 3.83628225903307e-05, "loss": 1.3342, "step": 741 }, { "epoch": 0.16, "learning_rate": 3.8357418774569335e-05, "loss": 1.2597, "step": 742 }, { "epoch": 0.16, "learning_rate": 3.835200643720886e-05, "loss": 1.2928, "step": 743 }, { "epoch": 0.16, "learning_rate": 3.8346585580761705e-05, "loss": 1.3252, "step": 744 }, { "epoch": 0.16, "learning_rate": 3.8341156207744254e-05, "loss": 1.3273, "step": 745 }, { "epoch": 0.16, "learning_rate": 3.833571832067685e-05, "loss": 1.3518, "step": 746 }, { "epoch": 0.16, "learning_rate": 3.8330271922083795e-05, "loss": 1.2859, "step": 747 }, { "epoch": 0.16, "learning_rate": 3.8324817014493326e-05, "loss": 1.3529, "step": 748 }, { "epoch": 0.16, "learning_rate": 3.831935360043763e-05, "loss": 1.3298, "step": 749 }, { "epoch": 0.16, "learning_rate": 3.8313881682452854e-05, "loss": 1.2983, "step": 750 }, { "epoch": 0.16, "learning_rate": 3.830840126307909e-05, "loss": 1.3388, "step": 751 }, { "epoch": 0.16, "learning_rate": 3.830291234486037e-05, "loss": 1.355, "step": 752 }, { "epoch": 0.16, "learning_rate": 3.8297414930344684e-05, "loss": 1.3197, "step": 753 }, { "epoch": 0.16, "learning_rate": 3.829190902208394e-05, "loss": 1.3258, "step": 754 }, { "epoch": 0.16, "learning_rate": 3.828639462263403e-05, "loss": 1.3177, "step": 755 }, { "epoch": 0.16, "learning_rate": 3.8280871734554746e-05, "loss": 1.3312, "step": 756 }, { "epoch": 0.16, "learning_rate": 3.827534036040984e-05, "loss": 1.3159, "step": 757 }, { "epoch": 0.16, "learning_rate": 3.8269800502767e-05, "loss": 1.3111, "step": 758 }, { "epoch": 0.16, "learning_rate": 3.8264252164197866e-05, "loss": 1.2511, "step": 759 }, { "epoch": 0.16, "learning_rate": 3.825869534727799e-05, "loss": 1.3241, "step": 760 }, { "epoch": 0.16, "learning_rate": 3.8253130054586886e-05, "loss": 1.2966, "step": 761 }, { "epoch": 0.16, "learning_rate": 3.824755628870797e-05, "loss": 1.3275, "step": 762 }, { "epoch": 0.16, "learning_rate": 3.824197405222863e-05, "loss": 1.3324, "step": 763 }, { "epoch": 0.16, "learning_rate": 3.8236383347740146e-05, "loss": 1.3197, "step": 764 }, { "epoch": 0.16, "learning_rate": 3.823078417783777e-05, "loss": 1.3572, "step": 765 }, { "epoch": 0.16, "learning_rate": 3.8225176545120646e-05, "loss": 1.3027, "step": 766 }, { "epoch": 0.16, "learning_rate": 3.821956045219186e-05, "loss": 1.3135, "step": 767 }, { "epoch": 0.16, "learning_rate": 3.821393590165845e-05, "loss": 1.3586, "step": 768 }, { "epoch": 0.16, "learning_rate": 3.8208302896131344e-05, "loss": 1.2514, "step": 769 }, { "epoch": 0.16, "learning_rate": 3.820266143822541e-05, "loss": 1.3219, "step": 770 }, { "epoch": 0.16, "learning_rate": 3.819701153055944e-05, "loss": 1.3377, "step": 771 }, { "epoch": 0.16, "learning_rate": 3.8191353175756145e-05, "loss": 1.3107, "step": 772 }, { "epoch": 0.16, "learning_rate": 3.818568637644217e-05, "loss": 1.36, "step": 773 }, { "epoch": 0.16, "learning_rate": 3.8180011135248055e-05, "loss": 1.3692, "step": 774 }, { "epoch": 0.16, "learning_rate": 3.8174327454808275e-05, "loss": 1.3231, "step": 775 }, { "epoch": 0.16, "learning_rate": 3.816863533776124e-05, "loss": 1.3213, "step": 776 }, { "epoch": 0.16, "learning_rate": 3.816293478674923e-05, "loss": 1.3449, "step": 777 }, { "epoch": 0.16, "learning_rate": 3.815722580441849e-05, "loss": 1.3097, "step": 778 }, { "epoch": 0.16, "learning_rate": 3.815150839341915e-05, "loss": 1.3468, "step": 779 }, { "epoch": 0.16, "learning_rate": 3.8145782556405244e-05, "loss": 1.3213, "step": 780 }, { "epoch": 0.16, "learning_rate": 3.814004829603475e-05, "loss": 1.3149, "step": 781 }, { "epoch": 0.16, "learning_rate": 3.813430561496953e-05, "loss": 1.3353, "step": 782 }, { "epoch": 0.16, "learning_rate": 3.812855451587537e-05, "loss": 1.3114, "step": 783 }, { "epoch": 0.16, "learning_rate": 3.812279500142194e-05, "loss": 1.3304, "step": 784 }, { "epoch": 0.17, "learning_rate": 3.811702707428285e-05, "loss": 1.3388, "step": 785 }, { "epoch": 0.17, "learning_rate": 3.81112507371356e-05, "loss": 1.2986, "step": 786 }, { "epoch": 0.17, "learning_rate": 3.810546599266158e-05, "loss": 1.3354, "step": 787 }, { "epoch": 0.17, "learning_rate": 3.8099672843546106e-05, "loss": 1.3486, "step": 788 }, { "epoch": 0.17, "learning_rate": 3.809387129247838e-05, "loss": 1.3586, "step": 789 }, { "epoch": 0.17, "learning_rate": 3.808806134215151e-05, "loss": 1.3622, "step": 790 }, { "epoch": 0.17, "learning_rate": 3.80822429952625e-05, "loss": 1.3256, "step": 791 }, { "epoch": 0.17, "learning_rate": 3.8076416254512256e-05, "loss": 1.3196, "step": 792 }, { "epoch": 0.17, "learning_rate": 3.807058112260558e-05, "loss": 1.2956, "step": 793 }, { "epoch": 0.17, "learning_rate": 3.8064737602251155e-05, "loss": 1.3438, "step": 794 }, { "epoch": 0.17, "learning_rate": 3.8058885696161595e-05, "loss": 1.34, "step": 795 }, { "epoch": 0.17, "learning_rate": 3.805302540705335e-05, "loss": 1.3227, "step": 796 }, { "epoch": 0.17, "learning_rate": 3.8047156737646825e-05, "loss": 1.3718, "step": 797 }, { "epoch": 0.17, "learning_rate": 3.8041279690666254e-05, "loss": 1.2667, "step": 798 }, { "epoch": 0.17, "learning_rate": 3.803539426883982e-05, "loss": 1.302, "step": 799 }, { "epoch": 0.17, "learning_rate": 3.8029500474899544e-05, "loss": 1.341, "step": 800 }, { "epoch": 0.17, "learning_rate": 3.802359831158135e-05, "loss": 1.343, "step": 801 }, { "epoch": 0.17, "learning_rate": 3.801768778162506e-05, "loss": 1.3368, "step": 802 }, { "epoch": 0.17, "learning_rate": 3.8011768887774365e-05, "loss": 1.3268, "step": 803 }, { "epoch": 0.17, "learning_rate": 3.800584163277684e-05, "loss": 1.3112, "step": 804 }, { "epoch": 0.17, "learning_rate": 3.7999906019383954e-05, "loss": 1.3869, "step": 805 }, { "epoch": 0.17, "learning_rate": 3.799396205035104e-05, "loss": 1.3264, "step": 806 }, { "epoch": 0.17, "learning_rate": 3.7988009728437304e-05, "loss": 1.3573, "step": 807 }, { "epoch": 0.17, "learning_rate": 3.7982049056405866e-05, "loss": 1.3474, "step": 808 }, { "epoch": 0.17, "learning_rate": 3.797608003702368e-05, "loss": 1.3319, "step": 809 }, { "epoch": 0.17, "learning_rate": 3.79701026730616e-05, "loss": 1.3339, "step": 810 }, { "epoch": 0.17, "learning_rate": 3.796411696729434e-05, "loss": 1.272, "step": 811 }, { "epoch": 0.17, "learning_rate": 3.79581229225005e-05, "loss": 1.3628, "step": 812 }, { "epoch": 0.17, "learning_rate": 3.795212054146254e-05, "loss": 1.3212, "step": 813 }, { "epoch": 0.17, "learning_rate": 3.794610982696679e-05, "loss": 1.3288, "step": 814 }, { "epoch": 0.17, "learning_rate": 3.7940090781803454e-05, "loss": 1.2961, "step": 815 }, { "epoch": 0.17, "learning_rate": 3.7934063408766606e-05, "loss": 1.3084, "step": 816 }, { "epoch": 0.17, "learning_rate": 3.792802771065417e-05, "loss": 1.328, "step": 817 }, { "epoch": 0.17, "learning_rate": 3.792198369026796e-05, "loss": 1.2938, "step": 818 }, { "epoch": 0.17, "learning_rate": 3.791593135041362e-05, "loss": 1.3381, "step": 819 }, { "epoch": 0.17, "learning_rate": 3.790987069390069e-05, "loss": 1.2808, "step": 820 }, { "epoch": 0.17, "learning_rate": 3.790380172354255e-05, "loss": 1.2853, "step": 821 }, { "epoch": 0.17, "learning_rate": 3.789772444215644e-05, "loss": 1.3335, "step": 822 }, { "epoch": 0.17, "learning_rate": 3.7891638852563455e-05, "loss": 1.3278, "step": 823 }, { "epoch": 0.17, "learning_rate": 3.788554495758858e-05, "loss": 1.2816, "step": 824 }, { "epoch": 0.17, "learning_rate": 3.7879442760060604e-05, "loss": 1.3327, "step": 825 }, { "epoch": 0.17, "learning_rate": 3.78733322628122e-05, "loss": 1.3372, "step": 826 }, { "epoch": 0.17, "learning_rate": 3.786721346867991e-05, "loss": 1.347, "step": 827 }, { "epoch": 0.17, "learning_rate": 3.786108638050408e-05, "loss": 1.3139, "step": 828 }, { "epoch": 0.17, "learning_rate": 3.785495100112894e-05, "loss": 1.3048, "step": 829 }, { "epoch": 0.17, "learning_rate": 3.784880733340257e-05, "loss": 1.3011, "step": 830 }, { "epoch": 0.17, "learning_rate": 3.784265538017689e-05, "loss": 1.3368, "step": 831 }, { "epoch": 0.18, "learning_rate": 3.7836495144307644e-05, "loss": 1.2558, "step": 832 }, { "epoch": 0.18, "learning_rate": 3.783032662865447e-05, "loss": 1.3549, "step": 833 }, { "epoch": 0.18, "learning_rate": 3.782414983608081e-05, "loss": 1.3048, "step": 834 }, { "epoch": 0.18, "learning_rate": 3.7817964769453956e-05, "loss": 1.326, "step": 835 }, { "epoch": 0.18, "learning_rate": 3.781177143164505e-05, "loss": 1.3329, "step": 836 }, { "epoch": 0.18, "learning_rate": 3.7805569825529055e-05, "loss": 1.32, "step": 837 }, { "epoch": 0.18, "learning_rate": 3.779935995398481e-05, "loss": 1.3174, "step": 838 }, { "epoch": 0.18, "learning_rate": 3.7793141819894955e-05, "loss": 1.3625, "step": 839 }, { "epoch": 0.18, "learning_rate": 3.778691542614596e-05, "loss": 1.3777, "step": 840 }, { "epoch": 0.18, "learning_rate": 3.778068077562817e-05, "loss": 1.3298, "step": 841 }, { "epoch": 0.18, "learning_rate": 3.7774437871235724e-05, "loss": 1.3167, "step": 842 }, { "epoch": 0.18, "learning_rate": 3.776818671586662e-05, "loss": 1.2919, "step": 843 }, { "epoch": 0.18, "learning_rate": 3.776192731242265e-05, "loss": 1.3498, "step": 844 }, { "epoch": 0.18, "learning_rate": 3.775565966380949e-05, "loss": 1.3421, "step": 845 }, { "epoch": 0.18, "learning_rate": 3.774938377293659e-05, "loss": 1.3182, "step": 846 }, { "epoch": 0.18, "learning_rate": 3.774309964271725e-05, "loss": 1.3455, "step": 847 }, { "epoch": 0.18, "learning_rate": 3.7736807276068604e-05, "loss": 1.325, "step": 848 }, { "epoch": 0.18, "learning_rate": 3.773050667591158e-05, "loss": 1.338, "step": 849 }, { "epoch": 0.18, "learning_rate": 3.772419784517095e-05, "loss": 1.31, "step": 850 }, { "epoch": 0.18, "learning_rate": 3.771788078677532e-05, "loss": 1.3155, "step": 851 }, { "epoch": 0.18, "learning_rate": 3.771155550365708e-05, "loss": 1.2947, "step": 852 }, { "epoch": 0.18, "learning_rate": 3.770522199875247e-05, "loss": 1.3655, "step": 853 }, { "epoch": 0.18, "learning_rate": 3.7698880275001516e-05, "loss": 1.3295, "step": 854 }, { "epoch": 0.18, "learning_rate": 3.769253033534808e-05, "loss": 1.3509, "step": 855 }, { "epoch": 0.18, "learning_rate": 3.7686172182739845e-05, "loss": 1.3094, "step": 856 }, { "epoch": 0.18, "learning_rate": 3.767980582012828e-05, "loss": 1.2749, "step": 857 }, { "epoch": 0.18, "learning_rate": 3.7673431250468695e-05, "loss": 1.3423, "step": 858 }, { "epoch": 0.18, "learning_rate": 3.766704847672018e-05, "loss": 1.3708, "step": 859 }, { "epoch": 0.18, "learning_rate": 3.766065750184566e-05, "loss": 1.3212, "step": 860 }, { "epoch": 0.18, "learning_rate": 3.7654258328811856e-05, "loss": 1.3406, "step": 861 }, { "epoch": 0.18, "learning_rate": 3.764785096058927e-05, "loss": 1.317, "step": 862 }, { "epoch": 0.18, "learning_rate": 3.764143540015227e-05, "loss": 1.2996, "step": 863 }, { "epoch": 0.18, "learning_rate": 3.763501165047896e-05, "loss": 1.3164, "step": 864 }, { "epoch": 0.18, "learning_rate": 3.7628579714551285e-05, "loss": 1.3208, "step": 865 }, { "epoch": 0.18, "learning_rate": 3.7622139595354976e-05, "loss": 1.3605, "step": 866 }, { "epoch": 0.18, "learning_rate": 3.7615691295879574e-05, "loss": 1.3146, "step": 867 }, { "epoch": 0.18, "learning_rate": 3.76092348191184e-05, "loss": 1.3372, "step": 868 }, { "epoch": 0.18, "learning_rate": 3.7602770168068586e-05, "loss": 1.302, "step": 869 }, { "epoch": 0.18, "learning_rate": 3.759629734573105e-05, "loss": 1.3072, "step": 870 }, { "epoch": 0.18, "learning_rate": 3.758981635511051e-05, "loss": 1.3293, "step": 871 }, { "epoch": 0.18, "learning_rate": 3.758332719921547e-05, "loss": 1.3123, "step": 872 }, { "epoch": 0.18, "learning_rate": 3.757682988105823e-05, "loss": 1.3352, "step": 873 }, { "epoch": 0.18, "learning_rate": 3.7570324403654866e-05, "loss": 1.2952, "step": 874 }, { "epoch": 0.18, "learning_rate": 3.756381077002526e-05, "loss": 1.3695, "step": 875 }, { "epoch": 0.18, "learning_rate": 3.755728898319306e-05, "loss": 1.3138, "step": 876 }, { "epoch": 0.18, "learning_rate": 3.7550759046185726e-05, "loss": 1.4168, "step": 877 }, { "epoch": 0.18, "learning_rate": 3.7544220962034475e-05, "loss": 1.3155, "step": 878 }, { "epoch": 0.18, "learning_rate": 3.7537674733774315e-05, "loss": 1.2633, "step": 879 }, { "epoch": 0.19, "learning_rate": 3.753112036444404e-05, "loss": 1.3867, "step": 880 }, { "epoch": 0.19, "learning_rate": 3.752455785708622e-05, "loss": 1.3262, "step": 881 }, { "epoch": 0.19, "learning_rate": 3.7517987214747186e-05, "loss": 1.3172, "step": 882 }, { "epoch": 0.19, "learning_rate": 3.751140844047708e-05, "loss": 1.3271, "step": 883 }, { "epoch": 0.19, "learning_rate": 3.7504821537329795e-05, "loss": 1.304, "step": 884 }, { "epoch": 0.19, "learning_rate": 3.7498226508362996e-05, "loss": 1.3103, "step": 885 }, { "epoch": 0.19, "learning_rate": 3.749162335663813e-05, "loss": 1.3819, "step": 886 }, { "epoch": 0.19, "learning_rate": 3.7485012085220416e-05, "loss": 1.3204, "step": 887 }, { "epoch": 0.19, "learning_rate": 3.747839269717882e-05, "loss": 1.3505, "step": 888 }, { "epoch": 0.19, "learning_rate": 3.7471765195586115e-05, "loss": 1.3184, "step": 889 }, { "epoch": 0.19, "learning_rate": 3.74651295835188e-05, "loss": 1.3156, "step": 890 }, { "epoch": 0.19, "learning_rate": 3.745848586405717e-05, "loss": 1.311, "step": 891 }, { "epoch": 0.19, "learning_rate": 3.745183404028525e-05, "loss": 1.306, "step": 892 }, { "epoch": 0.19, "learning_rate": 3.7445174115290875e-05, "loss": 1.3066, "step": 893 }, { "epoch": 0.19, "learning_rate": 3.74385060921656e-05, "loss": 1.3126, "step": 894 }, { "epoch": 0.19, "learning_rate": 3.743182997400475e-05, "loss": 1.2545, "step": 895 }, { "epoch": 0.19, "learning_rate": 3.742514576390741e-05, "loss": 1.372, "step": 896 }, { "epoch": 0.19, "learning_rate": 3.741845346497643e-05, "loss": 1.2692, "step": 897 }, { "epoch": 0.19, "learning_rate": 3.741175308031839e-05, "loss": 1.2986, "step": 898 }, { "epoch": 0.19, "learning_rate": 3.740504461304366e-05, "loss": 1.3442, "step": 899 }, { "epoch": 0.19, "learning_rate": 3.739832806626632e-05, "loss": 1.2773, "step": 900 }, { "epoch": 0.19, "learning_rate": 3.7391603443104244e-05, "loss": 1.2644, "step": 901 }, { "epoch": 0.19, "learning_rate": 3.738487074667902e-05, "loss": 1.3268, "step": 902 }, { "epoch": 0.19, "learning_rate": 3.7378129980116e-05, "loss": 1.3084, "step": 903 }, { "epoch": 0.19, "learning_rate": 3.7371381146544276e-05, "loss": 1.3093, "step": 904 }, { "epoch": 0.19, "learning_rate": 3.736462424909669e-05, "loss": 1.2696, "step": 905 }, { "epoch": 0.19, "learning_rate": 3.735785929090983e-05, "loss": 1.2589, "step": 906 }, { "epoch": 0.19, "learning_rate": 3.7351086275124023e-05, "loss": 1.2937, "step": 907 }, { "epoch": 0.19, "learning_rate": 3.7344305204883326e-05, "loss": 1.36, "step": 908 }, { "epoch": 0.19, "learning_rate": 3.7337516083335536e-05, "loss": 1.3503, "step": 909 }, { "epoch": 0.19, "learning_rate": 3.7330718913632215e-05, "loss": 1.3143, "step": 910 }, { "epoch": 0.19, "learning_rate": 3.732391369892862e-05, "loss": 1.2691, "step": 911 }, { "epoch": 0.19, "learning_rate": 3.731710044238378e-05, "loss": 1.3538, "step": 912 }, { "epoch": 0.19, "learning_rate": 3.731027914716044e-05, "loss": 1.2826, "step": 913 }, { "epoch": 0.19, "learning_rate": 3.7303449816425066e-05, "loss": 1.3439, "step": 914 }, { "epoch": 0.19, "learning_rate": 3.729661245334787e-05, "loss": 1.3079, "step": 915 }, { "epoch": 0.19, "learning_rate": 3.728976706110278e-05, "loss": 1.2925, "step": 916 }, { "epoch": 0.19, "learning_rate": 3.7282913642867484e-05, "loss": 1.3079, "step": 917 }, { "epoch": 0.19, "learning_rate": 3.727605220182334e-05, "loss": 1.3148, "step": 918 }, { "epoch": 0.19, "learning_rate": 3.726918274115548e-05, "loss": 1.2858, "step": 919 }, { "epoch": 0.19, "learning_rate": 3.726230526405273e-05, "loss": 1.332, "step": 920 }, { "epoch": 0.19, "learning_rate": 3.725541977370765e-05, "loss": 1.3153, "step": 921 }, { "epoch": 0.19, "learning_rate": 3.7248526273316524e-05, "loss": 1.266, "step": 922 }, { "epoch": 0.19, "learning_rate": 3.724162476607933e-05, "loss": 1.3352, "step": 923 }, { "epoch": 0.19, "learning_rate": 3.72347152551998e-05, "loss": 1.2768, "step": 924 }, { "epoch": 0.19, "learning_rate": 3.722779774388535e-05, "loss": 1.283, "step": 925 }, { "epoch": 0.19, "learning_rate": 3.722087223534711e-05, "loss": 1.2801, "step": 926 }, { "epoch": 0.19, "learning_rate": 3.721393873279996e-05, "loss": 1.3038, "step": 927 }, { "epoch": 0.2, "learning_rate": 3.720699723946244e-05, "loss": 1.3247, "step": 928 }, { "epoch": 0.2, "learning_rate": 3.720004775855684e-05, "loss": 1.2946, "step": 929 }, { "epoch": 0.2, "learning_rate": 3.719309029330912e-05, "loss": 1.2982, "step": 930 }, { "epoch": 0.2, "learning_rate": 3.7186124846948995e-05, "loss": 1.2819, "step": 931 }, { "epoch": 0.2, "learning_rate": 3.7179151422709845e-05, "loss": 1.3325, "step": 932 }, { "epoch": 0.2, "learning_rate": 3.717217002382875e-05, "loss": 1.3033, "step": 933 }, { "epoch": 0.2, "learning_rate": 3.716518065354654e-05, "loss": 1.3273, "step": 934 }, { "epoch": 0.2, "learning_rate": 3.715818331510769e-05, "loss": 1.314, "step": 935 }, { "epoch": 0.2, "learning_rate": 3.71511780117604e-05, "loss": 1.3593, "step": 936 }, { "epoch": 0.2, "learning_rate": 3.714416474675657e-05, "loss": 1.3345, "step": 937 }, { "epoch": 0.2, "learning_rate": 3.7137143523351787e-05, "loss": 1.3185, "step": 938 }, { "epoch": 0.2, "learning_rate": 3.713011434480534e-05, "loss": 1.2981, "step": 939 }, { "epoch": 0.2, "learning_rate": 3.71230772143802e-05, "loss": 1.2702, "step": 940 }, { "epoch": 0.2, "learning_rate": 3.711603213534303e-05, "loss": 1.346, "step": 941 }, { "epoch": 0.2, "learning_rate": 3.710897911096421e-05, "loss": 1.2751, "step": 942 }, { "epoch": 0.2, "learning_rate": 3.710191814451777e-05, "loss": 1.307, "step": 943 }, { "epoch": 0.2, "learning_rate": 3.7094849239281444e-05, "loss": 1.3397, "step": 944 }, { "epoch": 0.2, "learning_rate": 3.7087772398536656e-05, "loss": 1.3408, "step": 945 }, { "epoch": 0.2, "learning_rate": 3.70806876255685e-05, "loss": 1.2815, "step": 946 }, { "epoch": 0.2, "learning_rate": 3.7073594923665774e-05, "loss": 1.3294, "step": 947 }, { "epoch": 0.2, "learning_rate": 3.7066494296120935e-05, "loss": 1.3056, "step": 948 }, { "epoch": 0.2, "learning_rate": 3.705938574623012e-05, "loss": 1.3195, "step": 949 }, { "epoch": 0.2, "learning_rate": 3.705226927729317e-05, "loss": 1.3343, "step": 950 }, { "epoch": 0.2, "learning_rate": 3.704514489261357e-05, "loss": 1.2853, "step": 951 }, { "epoch": 0.2, "learning_rate": 3.703801259549848e-05, "loss": 1.3278, "step": 952 }, { "epoch": 0.2, "learning_rate": 3.7030872389258777e-05, "loss": 1.2837, "step": 953 }, { "epoch": 0.2, "learning_rate": 3.702372427720895e-05, "loss": 1.3047, "step": 954 }, { "epoch": 0.2, "learning_rate": 3.701656826266721e-05, "loss": 1.3364, "step": 955 }, { "epoch": 0.2, "learning_rate": 3.7009404348955385e-05, "loss": 1.3342, "step": 956 }, { "epoch": 0.2, "learning_rate": 3.7002232539399014e-05, "loss": 1.3222, "step": 957 }, { "epoch": 0.2, "learning_rate": 3.6995052837327274e-05, "loss": 1.335, "step": 958 }, { "epoch": 0.2, "learning_rate": 3.6987865246073035e-05, "loss": 1.3284, "step": 959 }, { "epoch": 0.2, "learning_rate": 3.6980669768972795e-05, "loss": 1.3216, "step": 960 }, { "epoch": 0.2, "learning_rate": 3.6973466409366735e-05, "loss": 1.3034, "step": 961 }, { "epoch": 0.2, "learning_rate": 3.696625517059868e-05, "loss": 1.3074, "step": 962 }, { "epoch": 0.2, "learning_rate": 3.695903605601612e-05, "loss": 1.3484, "step": 963 }, { "epoch": 0.2, "learning_rate": 3.695180906897021e-05, "loss": 1.3161, "step": 964 }, { "epoch": 0.2, "learning_rate": 3.694457421281575e-05, "loss": 1.3406, "step": 965 }, { "epoch": 0.2, "learning_rate": 3.693733149091119e-05, "loss": 1.3661, "step": 966 }, { "epoch": 0.2, "learning_rate": 3.693008090661864e-05, "loss": 1.2985, "step": 967 }, { "epoch": 0.2, "learning_rate": 3.6922822463303846e-05, "loss": 1.3351, "step": 968 }, { "epoch": 0.2, "learning_rate": 3.691555616433622e-05, "loss": 1.3145, "step": 969 }, { "epoch": 0.2, "learning_rate": 3.69082820130888e-05, "loss": 1.3249, "step": 970 }, { "epoch": 0.2, "learning_rate": 3.69010000129383e-05, "loss": 1.2538, "step": 971 }, { "epoch": 0.2, "learning_rate": 3.689371016726504e-05, "loss": 1.3523, "step": 972 }, { "epoch": 0.2, "learning_rate": 3.6886412479453004e-05, "loss": 1.3724, "step": 973 }, { "epoch": 0.2, "learning_rate": 3.6879106952889826e-05, "loss": 1.3468, "step": 974 }, { "epoch": 0.21, "learning_rate": 3.687179359096675e-05, "loss": 1.3211, "step": 975 }, { "epoch": 0.21, "learning_rate": 3.686447239707868e-05, "loss": 1.3299, "step": 976 }, { "epoch": 0.21, "learning_rate": 3.685714337462415e-05, "loss": 1.2972, "step": 977 }, { "epoch": 0.21, "learning_rate": 3.6849806527005316e-05, "loss": 1.2821, "step": 978 }, { "epoch": 0.21, "learning_rate": 3.6842461857627986e-05, "loss": 1.3079, "step": 979 }, { "epoch": 0.21, "learning_rate": 3.6835109369901586e-05, "loss": 1.2907, "step": 980 }, { "epoch": 0.21, "learning_rate": 3.682774906723918e-05, "loss": 1.2979, "step": 981 }, { "epoch": 0.21, "learning_rate": 3.6820380953057446e-05, "loss": 1.331, "step": 982 }, { "epoch": 0.21, "learning_rate": 3.681300503077671e-05, "loss": 1.2952, "step": 983 }, { "epoch": 0.21, "learning_rate": 3.680562130382089e-05, "loss": 1.2752, "step": 984 }, { "epoch": 0.21, "learning_rate": 3.679822977561756e-05, "loss": 1.3218, "step": 985 }, { "epoch": 0.21, "learning_rate": 3.67908304495979e-05, "loss": 1.3119, "step": 986 }, { "epoch": 0.21, "learning_rate": 3.678342332919671e-05, "loss": 1.281, "step": 987 }, { "epoch": 0.21, "learning_rate": 3.6776008417852415e-05, "loss": 1.2747, "step": 988 }, { "epoch": 0.21, "learning_rate": 3.676858571900704e-05, "loss": 1.3361, "step": 989 }, { "epoch": 0.21, "learning_rate": 3.6761155236106246e-05, "loss": 1.3624, "step": 990 }, { "epoch": 0.21, "learning_rate": 3.67537169725993e-05, "loss": 1.2927, "step": 991 }, { "epoch": 0.21, "learning_rate": 3.6746270931939064e-05, "loss": 1.3159, "step": 992 }, { "epoch": 0.21, "learning_rate": 3.6738817117582045e-05, "loss": 1.2895, "step": 993 }, { "epoch": 0.21, "learning_rate": 3.6731355532988315e-05, "loss": 1.3099, "step": 994 }, { "epoch": 0.21, "learning_rate": 3.6723886181621595e-05, "loss": 1.3087, "step": 995 }, { "epoch": 0.21, "learning_rate": 3.6716409066949184e-05, "loss": 1.2716, "step": 996 }, { "epoch": 0.21, "learning_rate": 3.670892419244199e-05, "loss": 1.2939, "step": 997 }, { "epoch": 0.21, "learning_rate": 3.670143156157454e-05, "loss": 1.3464, "step": 998 }, { "epoch": 0.21, "learning_rate": 3.6693931177824934e-05, "loss": 1.2778, "step": 999 }, { "epoch": 0.21, "learning_rate": 3.66864230446749e-05, "loss": 1.3137, "step": 1000 }, { "epoch": 0.21, "learning_rate": 3.667890716560973e-05, "loss": 1.308, "step": 1001 }, { "epoch": 0.21, "learning_rate": 3.667138354411834e-05, "loss": 1.3033, "step": 1002 }, { "epoch": 0.21, "learning_rate": 3.666385218369324e-05, "loss": 1.3094, "step": 1003 }, { "epoch": 0.21, "learning_rate": 3.6656313087830505e-05, "loss": 1.3138, "step": 1004 }, { "epoch": 0.21, "learning_rate": 3.664876626002982e-05, "loss": 1.3232, "step": 1005 }, { "epoch": 0.21, "learning_rate": 3.6641211703794466e-05, "loss": 1.331, "step": 1006 }, { "epoch": 0.21, "learning_rate": 3.66336494226313e-05, "loss": 1.322, "step": 1007 }, { "epoch": 0.21, "learning_rate": 3.662607942005077e-05, "loss": 1.3342, "step": 1008 }, { "epoch": 0.21, "learning_rate": 3.66185016995669e-05, "loss": 1.3037, "step": 1009 }, { "epoch": 0.21, "learning_rate": 3.661091626469731e-05, "loss": 1.3338, "step": 1010 }, { "epoch": 0.21, "learning_rate": 3.6603323118963194e-05, "loss": 1.2913, "step": 1011 }, { "epoch": 0.21, "learning_rate": 3.659572226588932e-05, "loss": 1.3209, "step": 1012 }, { "epoch": 0.21, "learning_rate": 3.658811370900404e-05, "loss": 1.2759, "step": 1013 }, { "epoch": 0.21, "learning_rate": 3.658049745183928e-05, "loss": 1.3323, "step": 1014 }, { "epoch": 0.21, "learning_rate": 3.657287349793056e-05, "loss": 1.3535, "step": 1015 }, { "epoch": 0.21, "learning_rate": 3.656524185081693e-05, "loss": 1.2524, "step": 1016 }, { "epoch": 0.21, "learning_rate": 3.655760251404105e-05, "loss": 1.3218, "step": 1017 }, { "epoch": 0.21, "learning_rate": 3.654995549114913e-05, "loss": 1.3505, "step": 1018 }, { "epoch": 0.21, "learning_rate": 3.6542300785690954e-05, "loss": 1.2841, "step": 1019 }, { "epoch": 0.21, "learning_rate": 3.6534638401219874e-05, "loss": 1.3382, "step": 1020 }, { "epoch": 0.21, "learning_rate": 3.652696834129281e-05, "loss": 1.3212, "step": 1021 }, { "epoch": 0.21, "learning_rate": 3.6519290609470225e-05, "loss": 1.3338, "step": 1022 }, { "epoch": 0.22, "learning_rate": 3.651160520931617e-05, "loss": 1.2861, "step": 1023 }, { "epoch": 0.22, "learning_rate": 3.650391214439825e-05, "loss": 1.2802, "step": 1024 }, { "epoch": 0.22, "learning_rate": 3.64962114182876e-05, "loss": 1.2893, "step": 1025 }, { "epoch": 0.22, "learning_rate": 3.648850303455895e-05, "loss": 1.3217, "step": 1026 }, { "epoch": 0.22, "learning_rate": 3.6480786996790554e-05, "loss": 1.3497, "step": 1027 }, { "epoch": 0.22, "learning_rate": 3.647306330856425e-05, "loss": 1.3243, "step": 1028 }, { "epoch": 0.22, "learning_rate": 3.646533197346539e-05, "loss": 1.298, "step": 1029 }, { "epoch": 0.22, "learning_rate": 3.6457592995082915e-05, "loss": 1.2769, "step": 1030 }, { "epoch": 0.22, "learning_rate": 3.644984637700928e-05, "loss": 1.33, "step": 1031 }, { "epoch": 0.22, "learning_rate": 3.6442092122840505e-05, "loss": 1.327, "step": 1032 }, { "epoch": 0.22, "learning_rate": 3.643433023617616e-05, "loss": 1.2911, "step": 1033 }, { "epoch": 0.22, "learning_rate": 3.642656072061933e-05, "loss": 1.2925, "step": 1034 }, { "epoch": 0.22, "learning_rate": 3.641878357977668e-05, "loss": 1.3223, "step": 1035 }, { "epoch": 0.22, "learning_rate": 3.641099881725839e-05, "loss": 1.3262, "step": 1036 }, { "epoch": 0.22, "learning_rate": 3.6403206436678173e-05, "loss": 1.3007, "step": 1037 }, { "epoch": 0.22, "learning_rate": 3.63954064416533e-05, "loss": 1.3166, "step": 1038 }, { "epoch": 0.22, "learning_rate": 3.6387598835804555e-05, "loss": 1.3546, "step": 1039 }, { "epoch": 0.22, "learning_rate": 3.6379783622756275e-05, "loss": 1.3309, "step": 1040 }, { "epoch": 0.22, "learning_rate": 3.6371960806136313e-05, "loss": 1.341, "step": 1041 }, { "epoch": 0.22, "learning_rate": 3.636413038957605e-05, "loss": 1.319, "step": 1042 }, { "epoch": 0.22, "learning_rate": 3.635629237671041e-05, "loss": 1.2956, "step": 1043 }, { "epoch": 0.22, "learning_rate": 3.634844677117784e-05, "loss": 1.3025, "step": 1044 }, { "epoch": 0.22, "learning_rate": 3.63405935766203e-05, "loss": 1.3476, "step": 1045 }, { "epoch": 0.22, "learning_rate": 3.633273279668327e-05, "loss": 1.3185, "step": 1046 }, { "epoch": 0.22, "learning_rate": 3.632486443501578e-05, "loss": 1.311, "step": 1047 }, { "epoch": 0.22, "learning_rate": 3.631698849527034e-05, "loss": 1.3141, "step": 1048 }, { "epoch": 0.22, "learning_rate": 3.630910498110302e-05, "loss": 1.3186, "step": 1049 }, { "epoch": 0.22, "learning_rate": 3.630121389617336e-05, "loss": 1.3305, "step": 1050 }, { "epoch": 0.22, "learning_rate": 3.629331524414446e-05, "loss": 1.3101, "step": 1051 }, { "epoch": 0.22, "learning_rate": 3.6285409028682895e-05, "loss": 1.2645, "step": 1052 }, { "epoch": 0.22, "learning_rate": 3.627749525345878e-05, "loss": 1.3466, "step": 1053 }, { "epoch": 0.22, "learning_rate": 3.626957392214571e-05, "loss": 1.3205, "step": 1054 }, { "epoch": 0.22, "learning_rate": 3.626164503842082e-05, "loss": 1.3256, "step": 1055 }, { "epoch": 0.22, "learning_rate": 3.6253708605964724e-05, "loss": 1.2844, "step": 1056 }, { "epoch": 0.22, "learning_rate": 3.6245764628461556e-05, "loss": 1.3179, "step": 1057 }, { "epoch": 0.22, "learning_rate": 3.6237813109598944e-05, "loss": 1.3443, "step": 1058 }, { "epoch": 0.22, "learning_rate": 3.622985405306803e-05, "loss": 1.2955, "step": 1059 }, { "epoch": 0.22, "learning_rate": 3.622188746256343e-05, "loss": 1.3075, "step": 1060 }, { "epoch": 0.22, "learning_rate": 3.621391334178328e-05, "loss": 1.3125, "step": 1061 }, { "epoch": 0.22, "learning_rate": 3.62059316944292e-05, "loss": 1.2941, "step": 1062 }, { "epoch": 0.22, "learning_rate": 3.619794252420632e-05, "loss": 1.2985, "step": 1063 }, { "epoch": 0.22, "learning_rate": 3.618994583482323e-05, "loss": 1.3151, "step": 1064 }, { "epoch": 0.22, "learning_rate": 3.618194162999205e-05, "loss": 1.296, "step": 1065 }, { "epoch": 0.22, "learning_rate": 3.617392991342836e-05, "loss": 1.3287, "step": 1066 }, { "epoch": 0.22, "learning_rate": 3.616591068885123e-05, "loss": 1.2997, "step": 1067 }, { "epoch": 0.22, "learning_rate": 3.6157883959983234e-05, "loss": 1.2501, "step": 1068 }, { "epoch": 0.22, "learning_rate": 3.614984973055041e-05, "loss": 1.3166, "step": 1069 }, { "epoch": 0.23, "learning_rate": 3.614180800428228e-05, "loss": 1.3497, "step": 1070 }, { "epoch": 0.23, "learning_rate": 3.6133758784911864e-05, "loss": 1.3219, "step": 1071 }, { "epoch": 0.23, "learning_rate": 3.6125702076175636e-05, "loss": 1.3273, "step": 1072 }, { "epoch": 0.23, "learning_rate": 3.611763788181356e-05, "loss": 1.2938, "step": 1073 }, { "epoch": 0.23, "learning_rate": 3.610956620556907e-05, "loss": 1.3263, "step": 1074 }, { "epoch": 0.23, "learning_rate": 3.610148705118908e-05, "loss": 1.3174, "step": 1075 }, { "epoch": 0.23, "learning_rate": 3.609340042242397e-05, "loss": 1.2726, "step": 1076 }, { "epoch": 0.23, "learning_rate": 3.6085306323027596e-05, "loss": 1.3284, "step": 1077 }, { "epoch": 0.23, "learning_rate": 3.607720475675727e-05, "loss": 1.3459, "step": 1078 }, { "epoch": 0.23, "learning_rate": 3.606909572737378e-05, "loss": 1.3363, "step": 1079 }, { "epoch": 0.23, "learning_rate": 3.6060979238641363e-05, "loss": 1.2867, "step": 1080 }, { "epoch": 0.23, "learning_rate": 3.6052855294327746e-05, "loss": 1.3195, "step": 1081 }, { "epoch": 0.23, "learning_rate": 3.604472389820409e-05, "loss": 1.2758, "step": 1082 }, { "epoch": 0.23, "learning_rate": 3.6036585054045044e-05, "loss": 1.3045, "step": 1083 }, { "epoch": 0.23, "learning_rate": 3.602843876562868e-05, "loss": 1.3325, "step": 1084 }, { "epoch": 0.23, "learning_rate": 3.6020285036736554e-05, "loss": 1.2989, "step": 1085 }, { "epoch": 0.23, "learning_rate": 3.601212387115366e-05, "loss": 1.3362, "step": 1086 }, { "epoch": 0.23, "learning_rate": 3.6003955272668444e-05, "loss": 1.314, "step": 1087 }, { "epoch": 0.23, "learning_rate": 3.5995779245072816e-05, "loss": 1.321, "step": 1088 }, { "epoch": 0.23, "learning_rate": 3.5987595792162126e-05, "loss": 1.3526, "step": 1089 }, { "epoch": 0.23, "learning_rate": 3.597940491773516e-05, "loss": 1.2579, "step": 1090 }, { "epoch": 0.23, "learning_rate": 3.5971206625594176e-05, "loss": 1.333, "step": 1091 }, { "epoch": 0.23, "learning_rate": 3.5963000919544844e-05, "loss": 1.3384, "step": 1092 }, { "epoch": 0.23, "learning_rate": 3.59547878033963e-05, "loss": 1.2867, "step": 1093 }, { "epoch": 0.23, "learning_rate": 3.594656728096111e-05, "loss": 1.3241, "step": 1094 }, { "epoch": 0.23, "learning_rate": 3.5938339356055274e-05, "loss": 1.3063, "step": 1095 }, { "epoch": 0.23, "learning_rate": 3.593010403249824e-05, "loss": 1.316, "step": 1096 }, { "epoch": 0.23, "learning_rate": 3.592186131411288e-05, "loss": 1.3018, "step": 1097 }, { "epoch": 0.23, "learning_rate": 3.5913611204725496e-05, "loss": 1.3229, "step": 1098 }, { "epoch": 0.23, "learning_rate": 3.590535370816584e-05, "loss": 1.3447, "step": 1099 }, { "epoch": 0.23, "learning_rate": 3.589708882826707e-05, "loss": 1.3017, "step": 1100 }, { "epoch": 0.23, "learning_rate": 3.588881656886578e-05, "loss": 1.2863, "step": 1101 }, { "epoch": 0.23, "learning_rate": 3.5880536933802e-05, "loss": 1.3163, "step": 1102 }, { "epoch": 0.23, "learning_rate": 3.587224992691917e-05, "loss": 1.3418, "step": 1103 }, { "epoch": 0.23, "learning_rate": 3.586395555206417e-05, "loss": 1.3082, "step": 1104 }, { "epoch": 0.23, "learning_rate": 3.585565381308726e-05, "loss": 1.2337, "step": 1105 }, { "epoch": 0.23, "learning_rate": 3.584734471384217e-05, "loss": 1.2875, "step": 1106 }, { "epoch": 0.23, "learning_rate": 3.5839028258186014e-05, "loss": 1.3158, "step": 1107 }, { "epoch": 0.23, "learning_rate": 3.583070444997932e-05, "loss": 1.3193, "step": 1108 }, { "epoch": 0.23, "learning_rate": 3.5822373293086055e-05, "loss": 1.3294, "step": 1109 }, { "epoch": 0.23, "learning_rate": 3.581403479137358e-05, "loss": 1.2849, "step": 1110 }, { "epoch": 0.23, "learning_rate": 3.580568894871265e-05, "loss": 1.3061, "step": 1111 }, { "epoch": 0.23, "learning_rate": 3.579733576897746e-05, "loss": 1.2782, "step": 1112 }, { "epoch": 0.23, "learning_rate": 3.578897525604558e-05, "loss": 1.3421, "step": 1113 }, { "epoch": 0.23, "learning_rate": 3.578060741379801e-05, "loss": 1.3636, "step": 1114 }, { "epoch": 0.23, "learning_rate": 3.577223224611915e-05, "loss": 1.2847, "step": 1115 }, { "epoch": 0.23, "learning_rate": 3.576384975689677e-05, "loss": 1.2856, "step": 1116 }, { "epoch": 0.23, "learning_rate": 3.575545995002207e-05, "loss": 1.3305, "step": 1117 }, { "epoch": 0.24, "learning_rate": 3.574706282938964e-05, "loss": 1.3576, "step": 1118 }, { "epoch": 0.24, "learning_rate": 3.573865839889746e-05, "loss": 1.3519, "step": 1119 }, { "epoch": 0.24, "learning_rate": 3.5730246662446916e-05, "loss": 1.2934, "step": 1120 }, { "epoch": 0.24, "learning_rate": 3.572182762394276e-05, "loss": 1.3416, "step": 1121 }, { "epoch": 0.24, "learning_rate": 3.571340128729315e-05, "loss": 1.2764, "step": 1122 }, { "epoch": 0.24, "learning_rate": 3.570496765640964e-05, "loss": 1.2825, "step": 1123 }, { "epoch": 0.24, "learning_rate": 3.569652673520715e-05, "loss": 1.3168, "step": 1124 }, { "epoch": 0.24, "learning_rate": 3.5688078527604e-05, "loss": 1.259, "step": 1125 }, { "epoch": 0.24, "learning_rate": 3.56796230375219e-05, "loss": 1.2608, "step": 1126 }, { "epoch": 0.24, "learning_rate": 3.567116026888591e-05, "loss": 1.3269, "step": 1127 }, { "epoch": 0.24, "learning_rate": 3.5662690225624484e-05, "loss": 1.2956, "step": 1128 }, { "epoch": 0.24, "learning_rate": 3.565421291166946e-05, "loss": 1.2896, "step": 1129 }, { "epoch": 0.24, "learning_rate": 3.5645728330956074e-05, "loss": 1.3015, "step": 1130 }, { "epoch": 0.24, "learning_rate": 3.563723648742286e-05, "loss": 1.3056, "step": 1131 }, { "epoch": 0.24, "learning_rate": 3.5628737385011814e-05, "loss": 1.2903, "step": 1132 }, { "epoch": 0.24, "learning_rate": 3.562023102766822e-05, "loss": 1.3046, "step": 1133 }, { "epoch": 0.24, "learning_rate": 3.561171741934081e-05, "loss": 1.3552, "step": 1134 }, { "epoch": 0.24, "learning_rate": 3.56031965639816e-05, "loss": 1.3289, "step": 1135 }, { "epoch": 0.24, "learning_rate": 3.559466846554604e-05, "loss": 1.3383, "step": 1136 }, { "epoch": 0.24, "learning_rate": 3.5586133127992904e-05, "loss": 1.3431, "step": 1137 }, { "epoch": 0.24, "learning_rate": 3.557759055528433e-05, "loss": 1.2967, "step": 1138 }, { "epoch": 0.24, "learning_rate": 3.5569040751385825e-05, "loss": 1.3167, "step": 1139 }, { "epoch": 0.24, "learning_rate": 3.556048372026625e-05, "loss": 1.2515, "step": 1140 }, { "epoch": 0.24, "learning_rate": 3.555191946589781e-05, "loss": 1.3347, "step": 1141 }, { "epoch": 0.24, "learning_rate": 3.554334799225608e-05, "loss": 1.3092, "step": 1142 }, { "epoch": 0.24, "learning_rate": 3.553476930331996e-05, "loss": 1.3033, "step": 1143 }, { "epoch": 0.24, "learning_rate": 3.5526183403071754e-05, "loss": 1.2948, "step": 1144 }, { "epoch": 0.24, "learning_rate": 3.551759029549705e-05, "loss": 1.3345, "step": 1145 }, { "epoch": 0.24, "learning_rate": 3.550898998458481e-05, "loss": 1.2931, "step": 1146 }, { "epoch": 0.24, "learning_rate": 3.550038247432734e-05, "loss": 1.3501, "step": 1147 }, { "epoch": 0.24, "learning_rate": 3.549176776872029e-05, "loss": 1.3781, "step": 1148 }, { "epoch": 0.24, "learning_rate": 3.5483145871762646e-05, "loss": 1.3182, "step": 1149 }, { "epoch": 0.24, "learning_rate": 3.547451678745673e-05, "loss": 1.3186, "step": 1150 }, { "epoch": 0.24, "learning_rate": 3.54658805198082e-05, "loss": 1.3037, "step": 1151 }, { "epoch": 0.24, "learning_rate": 3.545723707282606e-05, "loss": 1.2581, "step": 1152 }, { "epoch": 0.24, "learning_rate": 3.5448586450522635e-05, "loss": 1.3256, "step": 1153 }, { "epoch": 0.24, "learning_rate": 3.5439928656913586e-05, "loss": 1.3381, "step": 1154 }, { "epoch": 0.24, "learning_rate": 3.543126369601789e-05, "loss": 1.2692, "step": 1155 }, { "epoch": 0.24, "learning_rate": 3.542259157185787e-05, "loss": 1.3142, "step": 1156 }, { "epoch": 0.24, "learning_rate": 3.5413912288459174e-05, "loss": 1.3083, "step": 1157 }, { "epoch": 0.24, "learning_rate": 3.5405225849850754e-05, "loss": 1.3383, "step": 1158 }, { "epoch": 0.24, "learning_rate": 3.53965322600649e-05, "loss": 1.2736, "step": 1159 }, { "epoch": 0.24, "learning_rate": 3.5387831523137216e-05, "loss": 1.3476, "step": 1160 }, { "epoch": 0.24, "learning_rate": 3.5379123643106625e-05, "loss": 1.2945, "step": 1161 }, { "epoch": 0.24, "learning_rate": 3.5370408624015364e-05, "loss": 1.3525, "step": 1162 }, { "epoch": 0.24, "learning_rate": 3.536168646990899e-05, "loss": 1.3644, "step": 1163 }, { "epoch": 0.24, "learning_rate": 3.535295718483636e-05, "loss": 1.3243, "step": 1164 }, { "epoch": 0.25, "learning_rate": 3.5344220772849654e-05, "loss": 1.3574, "step": 1165 }, { "epoch": 0.25, "learning_rate": 3.533547723800435e-05, "loss": 1.3056, "step": 1166 }, { "epoch": 0.25, "learning_rate": 3.532672658435925e-05, "loss": 1.2936, "step": 1167 }, { "epoch": 0.25, "learning_rate": 3.531796881597643e-05, "loss": 1.3844, "step": 1168 }, { "epoch": 0.25, "learning_rate": 3.53092039369213e-05, "loss": 1.3335, "step": 1169 }, { "epoch": 0.25, "learning_rate": 3.530043195126255e-05, "loss": 1.2566, "step": 1170 }, { "epoch": 0.25, "learning_rate": 3.529165286307219e-05, "loss": 1.3402, "step": 1171 }, { "epoch": 0.25, "learning_rate": 3.528286667642549e-05, "loss": 1.314, "step": 1172 }, { "epoch": 0.25, "learning_rate": 3.527407339540106e-05, "loss": 1.3547, "step": 1173 }, { "epoch": 0.25, "learning_rate": 3.5265273024080776e-05, "loss": 1.291, "step": 1174 }, { "epoch": 0.25, "learning_rate": 3.52564655665498e-05, "loss": 1.3086, "step": 1175 }, { "epoch": 0.25, "learning_rate": 3.524765102689662e-05, "loss": 1.3185, "step": 1176 }, { "epoch": 0.25, "learning_rate": 3.523882940921296e-05, "loss": 1.3319, "step": 1177 }, { "epoch": 0.25, "learning_rate": 3.5230000717593865e-05, "loss": 1.299, "step": 1178 }, { "epoch": 0.25, "learning_rate": 3.522116495613766e-05, "loss": 1.3019, "step": 1179 }, { "epoch": 0.25, "learning_rate": 3.521232212894594e-05, "loss": 1.2854, "step": 1180 }, { "epoch": 0.25, "learning_rate": 3.5203472240123594e-05, "loss": 1.3305, "step": 1181 }, { "epoch": 0.25, "learning_rate": 3.519461529377877e-05, "loss": 1.308, "step": 1182 }, { "epoch": 0.25, "learning_rate": 3.5185751294022914e-05, "loss": 1.2531, "step": 1183 }, { "epoch": 0.25, "learning_rate": 3.5176880244970735e-05, "loss": 1.2869, "step": 1184 }, { "epoch": 0.25, "learning_rate": 3.516800215074021e-05, "loss": 1.3048, "step": 1185 }, { "epoch": 0.25, "learning_rate": 3.515911701545259e-05, "loss": 1.3049, "step": 1186 }, { "epoch": 0.25, "learning_rate": 3.5150224843232405e-05, "loss": 1.2604, "step": 1187 }, { "epoch": 0.25, "learning_rate": 3.514132563820744e-05, "loss": 1.3201, "step": 1188 }, { "epoch": 0.25, "learning_rate": 3.513241940450874e-05, "loss": 1.3027, "step": 1189 }, { "epoch": 0.25, "learning_rate": 3.512350614627062e-05, "loss": 1.2974, "step": 1190 }, { "epoch": 0.25, "learning_rate": 3.511458586763067e-05, "loss": 1.2961, "step": 1191 }, { "epoch": 0.25, "learning_rate": 3.510565857272972e-05, "loss": 1.2657, "step": 1192 }, { "epoch": 0.25, "learning_rate": 3.509672426571185e-05, "loss": 1.2957, "step": 1193 }, { "epoch": 0.25, "learning_rate": 3.508778295072441e-05, "loss": 1.3012, "step": 1194 }, { "epoch": 0.25, "learning_rate": 3.5078834631918014e-05, "loss": 1.3182, "step": 1195 }, { "epoch": 0.25, "learning_rate": 3.506987931344649e-05, "loss": 1.3301, "step": 1196 }, { "epoch": 0.25, "learning_rate": 3.506091699946697e-05, "loss": 1.2858, "step": 1197 }, { "epoch": 0.25, "learning_rate": 3.505194769413977e-05, "loss": 1.3322, "step": 1198 }, { "epoch": 0.25, "learning_rate": 3.504297140162851e-05, "loss": 1.2556, "step": 1199 }, { "epoch": 0.25, "learning_rate": 3.50339881261e-05, "loss": 1.2901, "step": 1200 }, { "epoch": 0.25, "learning_rate": 3.502499787172434e-05, "loss": 1.3396, "step": 1201 }, { "epoch": 0.25, "learning_rate": 3.5016000642674836e-05, "loss": 1.3205, "step": 1202 }, { "epoch": 0.25, "learning_rate": 3.500699644312805e-05, "loss": 1.3128, "step": 1203 }, { "epoch": 0.25, "learning_rate": 3.4997985277263765e-05, "loss": 1.304, "step": 1204 }, { "epoch": 0.25, "learning_rate": 3.498896714926502e-05, "loss": 1.3031, "step": 1205 }, { "epoch": 0.25, "learning_rate": 3.4979942063318066e-05, "loss": 1.333, "step": 1206 }, { "epoch": 0.25, "learning_rate": 3.497091002361238e-05, "loss": 1.3031, "step": 1207 }, { "epoch": 0.25, "learning_rate": 3.496187103434069e-05, "loss": 1.3285, "step": 1208 }, { "epoch": 0.25, "learning_rate": 3.4952825099698926e-05, "loss": 1.3237, "step": 1209 }, { "epoch": 0.25, "learning_rate": 3.4943772223886264e-05, "loss": 1.3202, "step": 1210 }, { "epoch": 0.25, "learning_rate": 3.493471241110507e-05, "loss": 1.3213, "step": 1211 }, { "epoch": 0.25, "learning_rate": 3.492564566556098e-05, "loss": 1.3025, "step": 1212 }, { "epoch": 0.26, "learning_rate": 3.491657199146281e-05, "loss": 1.314, "step": 1213 }, { "epoch": 0.26, "learning_rate": 3.490749139302258e-05, "loss": 1.3774, "step": 1214 }, { "epoch": 0.26, "learning_rate": 3.4898403874455584e-05, "loss": 1.3192, "step": 1215 }, { "epoch": 0.26, "learning_rate": 3.4889309439980256e-05, "loss": 1.2976, "step": 1216 }, { "epoch": 0.26, "learning_rate": 3.488020809381829e-05, "loss": 1.334, "step": 1217 }, { "epoch": 0.26, "learning_rate": 3.4871099840194575e-05, "loss": 1.29, "step": 1218 }, { "epoch": 0.26, "learning_rate": 3.4861984683337205e-05, "loss": 1.2634, "step": 1219 }, { "epoch": 0.26, "learning_rate": 3.485286262747747e-05, "loss": 1.2728, "step": 1220 }, { "epoch": 0.26, "learning_rate": 3.4843733676849876e-05, "loss": 1.3374, "step": 1221 }, { "epoch": 0.26, "learning_rate": 3.4834597835692117e-05, "loss": 1.2964, "step": 1222 }, { "epoch": 0.26, "learning_rate": 3.482545510824511e-05, "loss": 1.3318, "step": 1223 }, { "epoch": 0.26, "learning_rate": 3.481630549875293e-05, "loss": 1.2775, "step": 1224 }, { "epoch": 0.26, "learning_rate": 3.480714901146289e-05, "loss": 1.335, "step": 1225 }, { "epoch": 0.26, "learning_rate": 3.479798565062546e-05, "loss": 1.2692, "step": 1226 }, { "epoch": 0.26, "learning_rate": 3.478881542049432e-05, "loss": 1.3023, "step": 1227 }, { "epoch": 0.26, "learning_rate": 3.4779638325326326e-05, "loss": 1.3615, "step": 1228 }, { "epoch": 0.26, "learning_rate": 3.477045436938154e-05, "loss": 1.3302, "step": 1229 }, { "epoch": 0.26, "learning_rate": 3.476126355692318e-05, "loss": 1.3058, "step": 1230 }, { "epoch": 0.26, "learning_rate": 3.475206589221768e-05, "loss": 1.2731, "step": 1231 }, { "epoch": 0.26, "learning_rate": 3.4742861379534636e-05, "loss": 1.3252, "step": 1232 }, { "epoch": 0.26, "learning_rate": 3.473365002314682e-05, "loss": 1.3084, "step": 1233 }, { "epoch": 0.26, "learning_rate": 3.4724431827330196e-05, "loss": 1.2977, "step": 1234 }, { "epoch": 0.26, "learning_rate": 3.4715206796363876e-05, "loss": 1.2764, "step": 1235 }, { "epoch": 0.26, "learning_rate": 3.470597493453018e-05, "loss": 1.31, "step": 1236 }, { "epoch": 0.26, "learning_rate": 3.469673624611457e-05, "loss": 1.2913, "step": 1237 }, { "epoch": 0.26, "learning_rate": 3.4687490735405696e-05, "loss": 1.2798, "step": 1238 }, { "epoch": 0.26, "learning_rate": 3.467823840669536e-05, "loss": 1.346, "step": 1239 }, { "epoch": 0.26, "learning_rate": 3.466897926427854e-05, "loss": 1.2654, "step": 1240 }, { "epoch": 0.26, "learning_rate": 3.465971331245337e-05, "loss": 1.302, "step": 1241 }, { "epoch": 0.26, "learning_rate": 3.465044055552116e-05, "loss": 1.2815, "step": 1242 }, { "epoch": 0.26, "learning_rate": 3.464116099778634e-05, "loss": 1.2751, "step": 1243 }, { "epoch": 0.26, "learning_rate": 3.463187464355655e-05, "loss": 1.3094, "step": 1244 }, { "epoch": 0.26, "learning_rate": 3.462258149714255e-05, "loss": 1.3194, "step": 1245 }, { "epoch": 0.26, "learning_rate": 3.461328156285826e-05, "loss": 1.2753, "step": 1246 }, { "epoch": 0.26, "learning_rate": 3.4603974845020754e-05, "loss": 1.3322, "step": 1247 }, { "epoch": 0.26, "learning_rate": 3.4594661347950255e-05, "loss": 1.372, "step": 1248 }, { "epoch": 0.26, "learning_rate": 3.458534107597013e-05, "loss": 1.311, "step": 1249 }, { "epoch": 0.26, "learning_rate": 3.457601403340689e-05, "loss": 1.3296, "step": 1250 }, { "epoch": 0.26, "learning_rate": 3.45666802245902e-05, "loss": 1.317, "step": 1251 }, { "epoch": 0.26, "learning_rate": 3.455733965385284e-05, "loss": 1.2959, "step": 1252 }, { "epoch": 0.26, "learning_rate": 3.454799232553077e-05, "loss": 1.3007, "step": 1253 }, { "epoch": 0.26, "learning_rate": 3.4538638243963045e-05, "loss": 1.2638, "step": 1254 }, { "epoch": 0.26, "learning_rate": 3.4529277413491885e-05, "loss": 1.3071, "step": 1255 }, { "epoch": 0.26, "learning_rate": 3.451990983846262e-05, "loss": 1.3229, "step": 1256 }, { "epoch": 0.26, "learning_rate": 3.451053552322373e-05, "loss": 1.3249, "step": 1257 }, { "epoch": 0.26, "learning_rate": 3.4501154472126815e-05, "loss": 1.3593, "step": 1258 }, { "epoch": 0.26, "learning_rate": 3.4491766689526596e-05, "loss": 1.2989, "step": 1259 }, { "epoch": 0.27, "learning_rate": 3.448237217978093e-05, "loss": 1.2947, "step": 1260 }, { "epoch": 0.27, "learning_rate": 3.4472970947250794e-05, "loss": 1.2887, "step": 1261 }, { "epoch": 0.27, "learning_rate": 3.446356299630028e-05, "loss": 1.341, "step": 1262 }, { "epoch": 0.27, "learning_rate": 3.4454148331296606e-05, "loss": 1.3018, "step": 1263 }, { "epoch": 0.27, "learning_rate": 3.44447269566101e-05, "loss": 1.3284, "step": 1264 }, { "epoch": 0.27, "learning_rate": 3.443529887661421e-05, "loss": 1.3196, "step": 1265 }, { "epoch": 0.27, "learning_rate": 3.44258640956855e-05, "loss": 1.3603, "step": 1266 }, { "epoch": 0.27, "learning_rate": 3.441642261820363e-05, "loss": 1.2977, "step": 1267 }, { "epoch": 0.27, "learning_rate": 3.440697444855137e-05, "loss": 1.3293, "step": 1268 }, { "epoch": 0.27, "learning_rate": 3.439751959111463e-05, "loss": 1.3461, "step": 1269 }, { "epoch": 0.27, "learning_rate": 3.438805805028238e-05, "loss": 1.3195, "step": 1270 }, { "epoch": 0.27, "learning_rate": 3.4378589830446714e-05, "loss": 1.2863, "step": 1271 }, { "epoch": 0.27, "learning_rate": 3.436911493600282e-05, "loss": 1.2776, "step": 1272 }, { "epoch": 0.27, "learning_rate": 3.4359633371349e-05, "loss": 1.3062, "step": 1273 }, { "epoch": 0.27, "learning_rate": 3.435014514088662e-05, "loss": 1.2985, "step": 1274 }, { "epoch": 0.27, "learning_rate": 3.434065024902019e-05, "loss": 1.2872, "step": 1275 }, { "epoch": 0.27, "learning_rate": 3.4331148700157263e-05, "loss": 1.2782, "step": 1276 }, { "epoch": 0.27, "learning_rate": 3.43216404987085e-05, "loss": 1.3008, "step": 1277 }, { "epoch": 0.27, "learning_rate": 3.4312125649087664e-05, "loss": 1.2953, "step": 1278 }, { "epoch": 0.27, "learning_rate": 3.430260415571158e-05, "loss": 1.3191, "step": 1279 }, { "epoch": 0.27, "learning_rate": 3.429307602300019e-05, "loss": 1.2763, "step": 1280 }, { "epoch": 0.27, "learning_rate": 3.4283541255376466e-05, "loss": 1.2801, "step": 1281 }, { "epoch": 0.27, "learning_rate": 3.427399985726652e-05, "loss": 1.3344, "step": 1282 }, { "epoch": 0.27, "learning_rate": 3.42644518330995e-05, "loss": 1.2885, "step": 1283 }, { "epoch": 0.27, "learning_rate": 3.425489718730765e-05, "loss": 1.3324, "step": 1284 }, { "epoch": 0.27, "learning_rate": 3.4245335924326274e-05, "loss": 1.3043, "step": 1285 }, { "epoch": 0.27, "learning_rate": 3.423576804859375e-05, "loss": 1.2856, "step": 1286 }, { "epoch": 0.27, "learning_rate": 3.422619356455154e-05, "loss": 1.3455, "step": 1287 }, { "epoch": 0.27, "learning_rate": 3.421661247664417e-05, "loss": 1.3051, "step": 1288 }, { "epoch": 0.27, "learning_rate": 3.420702478931921e-05, "loss": 1.3006, "step": 1289 }, { "epoch": 0.27, "learning_rate": 3.419743050702732e-05, "loss": 1.2823, "step": 1290 }, { "epoch": 0.27, "learning_rate": 3.4187829634222205e-05, "loss": 1.3047, "step": 1291 }, { "epoch": 0.27, "learning_rate": 3.417822217536064e-05, "loss": 1.3242, "step": 1292 }, { "epoch": 0.27, "learning_rate": 3.4168608134902443e-05, "loss": 1.2822, "step": 1293 }, { "epoch": 0.27, "learning_rate": 3.4158987517310506e-05, "loss": 1.2905, "step": 1294 }, { "epoch": 0.27, "learning_rate": 3.414936032705075e-05, "loss": 1.3127, "step": 1295 }, { "epoch": 0.27, "learning_rate": 3.413972656859218e-05, "loss": 1.3153, "step": 1296 }, { "epoch": 0.27, "learning_rate": 3.413008624640683e-05, "loss": 1.3078, "step": 1297 }, { "epoch": 0.27, "learning_rate": 3.4120439364969766e-05, "loss": 1.2792, "step": 1298 }, { "epoch": 0.27, "learning_rate": 3.411078592875912e-05, "loss": 1.3039, "step": 1299 }, { "epoch": 0.27, "learning_rate": 3.410112594225607e-05, "loss": 1.2918, "step": 1300 }, { "epoch": 0.27, "learning_rate": 3.4091459409944836e-05, "loss": 1.2701, "step": 1301 }, { "epoch": 0.27, "learning_rate": 3.408178633631265e-05, "loss": 1.2595, "step": 1302 }, { "epoch": 0.27, "learning_rate": 3.407210672584979e-05, "loss": 1.3252, "step": 1303 }, { "epoch": 0.27, "learning_rate": 3.40624205830496e-05, "loss": 1.2767, "step": 1304 }, { "epoch": 0.27, "learning_rate": 3.4052727912408414e-05, "loss": 1.3054, "step": 1305 }, { "epoch": 0.27, "learning_rate": 3.404302871842563e-05, "loss": 1.3169, "step": 1306 }, { "epoch": 0.27, "learning_rate": 3.403332300560364e-05, "loss": 1.2939, "step": 1307 }, { "epoch": 0.28, "learning_rate": 3.4023610778447895e-05, "loss": 1.3022, "step": 1308 }, { "epoch": 0.28, "learning_rate": 3.401389204146685e-05, "loss": 1.3017, "step": 1309 }, { "epoch": 0.28, "learning_rate": 3.4004166799172004e-05, "loss": 1.3318, "step": 1310 }, { "epoch": 0.28, "learning_rate": 3.3994435056077827e-05, "loss": 1.3287, "step": 1311 }, { "epoch": 0.28, "learning_rate": 3.3984696816701865e-05, "loss": 1.2773, "step": 1312 }, { "epoch": 0.28, "learning_rate": 3.397495208556465e-05, "loss": 1.3318, "step": 1313 }, { "epoch": 0.28, "learning_rate": 3.3965200867189734e-05, "loss": 1.3425, "step": 1314 }, { "epoch": 0.28, "learning_rate": 3.395544316610367e-05, "loss": 1.3127, "step": 1315 }, { "epoch": 0.28, "learning_rate": 3.394567898683602e-05, "loss": 1.2811, "step": 1316 }, { "epoch": 0.28, "learning_rate": 3.3935908333919385e-05, "loss": 1.3013, "step": 1317 }, { "epoch": 0.28, "learning_rate": 3.3926131211889336e-05, "loss": 1.2883, "step": 1318 }, { "epoch": 0.28, "learning_rate": 3.391634762528445e-05, "loss": 1.2957, "step": 1319 }, { "epoch": 0.28, "learning_rate": 3.3906557578646317e-05, "loss": 1.2667, "step": 1320 }, { "epoch": 0.28, "learning_rate": 3.389676107651953e-05, "loss": 1.3282, "step": 1321 }, { "epoch": 0.28, "learning_rate": 3.388695812345168e-05, "loss": 1.3189, "step": 1322 }, { "epoch": 0.28, "learning_rate": 3.3877148723993306e-05, "loss": 1.3242, "step": 1323 }, { "epoch": 0.28, "learning_rate": 3.3867332882698016e-05, "loss": 1.2828, "step": 1324 }, { "epoch": 0.28, "learning_rate": 3.385751060412235e-05, "loss": 1.2589, "step": 1325 }, { "epoch": 0.28, "learning_rate": 3.3847681892825865e-05, "loss": 1.287, "step": 1326 }, { "epoch": 0.28, "learning_rate": 3.383784675337108e-05, "loss": 1.3118, "step": 1327 }, { "epoch": 0.28, "learning_rate": 3.382800519032353e-05, "loss": 1.3099, "step": 1328 }, { "epoch": 0.28, "learning_rate": 3.38181572082517e-05, "loss": 1.2927, "step": 1329 }, { "epoch": 0.28, "learning_rate": 3.3808302811727074e-05, "loss": 1.3071, "step": 1330 }, { "epoch": 0.28, "learning_rate": 3.379844200532411e-05, "loss": 1.3211, "step": 1331 }, { "epoch": 0.28, "learning_rate": 3.378857479362024e-05, "loss": 1.3449, "step": 1332 }, { "epoch": 0.28, "learning_rate": 3.377870118119587e-05, "loss": 1.2573, "step": 1333 }, { "epoch": 0.28, "learning_rate": 3.376882117263437e-05, "loss": 1.3084, "step": 1334 }, { "epoch": 0.28, "learning_rate": 3.37589347725221e-05, "loss": 1.2562, "step": 1335 }, { "epoch": 0.28, "learning_rate": 3.374904198544836e-05, "loss": 1.3295, "step": 1336 }, { "epoch": 0.28, "learning_rate": 3.373914281600544e-05, "loss": 1.3231, "step": 1337 }, { "epoch": 0.28, "learning_rate": 3.372923726878856e-05, "loss": 1.3315, "step": 1338 }, { "epoch": 0.28, "learning_rate": 3.371932534839594e-05, "loss": 1.3093, "step": 1339 }, { "epoch": 0.28, "learning_rate": 3.370940705942874e-05, "loss": 1.2672, "step": 1340 }, { "epoch": 0.28, "learning_rate": 3.369948240649106e-05, "loss": 1.3012, "step": 1341 }, { "epoch": 0.28, "learning_rate": 3.368955139418998e-05, "loss": 1.3134, "step": 1342 }, { "epoch": 0.28, "learning_rate": 3.367961402713553e-05, "loss": 1.3119, "step": 1343 }, { "epoch": 0.28, "learning_rate": 3.3669670309940663e-05, "loss": 1.2915, "step": 1344 }, { "epoch": 0.28, "learning_rate": 3.365972024722131e-05, "loss": 1.2518, "step": 1345 }, { "epoch": 0.28, "learning_rate": 3.3649763843596334e-05, "loss": 1.2958, "step": 1346 }, { "epoch": 0.28, "learning_rate": 3.363980110368755e-05, "loss": 1.2964, "step": 1347 }, { "epoch": 0.28, "learning_rate": 3.36298320321197e-05, "loss": 1.333, "step": 1348 }, { "epoch": 0.28, "learning_rate": 3.361985663352048e-05, "loss": 1.3069, "step": 1349 }, { "epoch": 0.28, "learning_rate": 3.360987491252051e-05, "loss": 1.2551, "step": 1350 }, { "epoch": 0.28, "learning_rate": 3.359988687375336e-05, "loss": 1.2971, "step": 1351 }, { "epoch": 0.28, "learning_rate": 3.3589892521855515e-05, "loss": 1.3038, "step": 1352 }, { "epoch": 0.28, "learning_rate": 3.35798918614664e-05, "loss": 1.3001, "step": 1353 }, { "epoch": 0.28, "learning_rate": 3.356988489722837e-05, "loss": 1.3387, "step": 1354 }, { "epoch": 0.29, "learning_rate": 3.355987163378671e-05, "loss": 1.2484, "step": 1355 }, { "epoch": 0.29, "learning_rate": 3.354985207578961e-05, "loss": 1.2616, "step": 1356 }, { "epoch": 0.29, "learning_rate": 3.3539826227888216e-05, "loss": 1.3249, "step": 1357 }, { "epoch": 0.29, "learning_rate": 3.352979409473656e-05, "loss": 1.2968, "step": 1358 }, { "epoch": 0.29, "learning_rate": 3.351975568099159e-05, "loss": 1.3149, "step": 1359 }, { "epoch": 0.29, "learning_rate": 3.350971099131322e-05, "loss": 1.362, "step": 1360 }, { "epoch": 0.29, "learning_rate": 3.349966003036421e-05, "loss": 1.2973, "step": 1361 }, { "epoch": 0.29, "learning_rate": 3.3489602802810276e-05, "loss": 1.2954, "step": 1362 }, { "epoch": 0.29, "learning_rate": 3.347953931332004e-05, "loss": 1.2829, "step": 1363 }, { "epoch": 0.29, "learning_rate": 3.3469469566565e-05, "loss": 1.2816, "step": 1364 }, { "epoch": 0.29, "learning_rate": 3.345939356721959e-05, "loss": 1.3187, "step": 1365 }, { "epoch": 0.29, "learning_rate": 3.3449311319961134e-05, "loss": 1.2707, "step": 1366 }, { "epoch": 0.29, "learning_rate": 3.343922282946985e-05, "loss": 1.3621, "step": 1367 }, { "epoch": 0.29, "learning_rate": 3.342912810042888e-05, "loss": 1.3427, "step": 1368 }, { "epoch": 0.29, "learning_rate": 3.3419027137524236e-05, "loss": 1.271, "step": 1369 }, { "epoch": 0.29, "learning_rate": 3.340891994544483e-05, "loss": 1.2508, "step": 1370 }, { "epoch": 0.29, "learning_rate": 3.339880652888246e-05, "loss": 1.2675, "step": 1371 }, { "epoch": 0.29, "learning_rate": 3.338868689253183e-05, "loss": 1.286, "step": 1372 }, { "epoch": 0.29, "learning_rate": 3.337856104109053e-05, "loss": 1.2766, "step": 1373 }, { "epoch": 0.29, "learning_rate": 3.3368428979259006e-05, "loss": 1.2772, "step": 1374 }, { "epoch": 0.29, "learning_rate": 3.335829071174063e-05, "loss": 1.3164, "step": 1375 }, { "epoch": 0.29, "learning_rate": 3.334814624324163e-05, "loss": 1.2738, "step": 1376 }, { "epoch": 0.29, "learning_rate": 3.333799557847109e-05, "loss": 1.3249, "step": 1377 }, { "epoch": 0.29, "learning_rate": 3.332783872214103e-05, "loss": 1.3157, "step": 1378 }, { "epoch": 0.29, "learning_rate": 3.331767567896629e-05, "loss": 1.2825, "step": 1379 }, { "epoch": 0.29, "learning_rate": 3.330750645366461e-05, "loss": 1.2947, "step": 1380 }, { "epoch": 0.29, "learning_rate": 3.3297331050956576e-05, "loss": 1.3257, "step": 1381 }, { "epoch": 0.29, "learning_rate": 3.328714947556568e-05, "loss": 1.2804, "step": 1382 }, { "epoch": 0.29, "learning_rate": 3.327696173221824e-05, "loss": 1.2964, "step": 1383 }, { "epoch": 0.29, "learning_rate": 3.326676782564347e-05, "loss": 1.3306, "step": 1384 }, { "epoch": 0.29, "learning_rate": 3.325656776057341e-05, "loss": 1.2447, "step": 1385 }, { "epoch": 0.29, "learning_rate": 3.324636154174299e-05, "loss": 1.3169, "step": 1386 }, { "epoch": 0.29, "learning_rate": 3.3236149173889975e-05, "loss": 1.2883, "step": 1387 }, { "epoch": 0.29, "learning_rate": 3.3225930661755005e-05, "loss": 1.3379, "step": 1388 }, { "epoch": 0.29, "learning_rate": 3.321570601008155e-05, "loss": 1.3591, "step": 1389 }, { "epoch": 0.29, "learning_rate": 3.320547522361595e-05, "loss": 1.3079, "step": 1390 }, { "epoch": 0.29, "learning_rate": 3.3195238307107375e-05, "loss": 1.3126, "step": 1391 }, { "epoch": 0.29, "learning_rate": 3.318499526530786e-05, "loss": 1.3291, "step": 1392 }, { "epoch": 0.29, "learning_rate": 3.317474610297226e-05, "loss": 1.3011, "step": 1393 }, { "epoch": 0.29, "learning_rate": 3.316449082485829e-05, "loss": 1.302, "step": 1394 }, { "epoch": 0.29, "learning_rate": 3.31542294357265e-05, "loss": 1.3377, "step": 1395 }, { "epoch": 0.29, "learning_rate": 3.3143961940340274e-05, "loss": 1.2829, "step": 1396 }, { "epoch": 0.29, "learning_rate": 3.313368834346583e-05, "loss": 1.2995, "step": 1397 }, { "epoch": 0.29, "learning_rate": 3.312340864987221e-05, "loss": 1.3037, "step": 1398 }, { "epoch": 0.29, "learning_rate": 3.311312286433131e-05, "loss": 1.3276, "step": 1399 }, { "epoch": 0.29, "learning_rate": 3.310283099161783e-05, "loss": 1.3256, "step": 1400 }, { "epoch": 0.29, "learning_rate": 3.309253303650932e-05, "loss": 1.3359, "step": 1401 }, { "epoch": 0.29, "learning_rate": 3.3082229003786114e-05, "loss": 1.3255, "step": 1402 }, { "epoch": 0.3, "learning_rate": 3.3071918898231413e-05, "loss": 1.2528, "step": 1403 }, { "epoch": 0.3, "learning_rate": 3.3061602724631205e-05, "loss": 1.3548, "step": 1404 }, { "epoch": 0.3, "learning_rate": 3.3051280487774316e-05, "loss": 1.2952, "step": 1405 }, { "epoch": 0.3, "learning_rate": 3.304095219245236e-05, "loss": 1.3009, "step": 1406 }, { "epoch": 0.3, "learning_rate": 3.303061784345979e-05, "loss": 1.3164, "step": 1407 }, { "epoch": 0.3, "learning_rate": 3.302027744559387e-05, "loss": 1.2863, "step": 1408 }, { "epoch": 0.3, "learning_rate": 3.300993100365463e-05, "loss": 1.2838, "step": 1409 }, { "epoch": 0.3, "learning_rate": 3.299957852244496e-05, "loss": 1.3443, "step": 1410 }, { "epoch": 0.3, "learning_rate": 3.298922000677053e-05, "loss": 1.278, "step": 1411 }, { "epoch": 0.3, "learning_rate": 3.2978855461439806e-05, "loss": 1.2888, "step": 1412 }, { "epoch": 0.3, "learning_rate": 3.296848489126406e-05, "loss": 1.2988, "step": 1413 }, { "epoch": 0.3, "learning_rate": 3.295810830105736e-05, "loss": 1.276, "step": 1414 }, { "epoch": 0.3, "learning_rate": 3.294772569563656e-05, "loss": 1.2776, "step": 1415 }, { "epoch": 0.3, "learning_rate": 3.293733707982132e-05, "loss": 1.2864, "step": 1416 }, { "epoch": 0.3, "learning_rate": 3.292694245843407e-05, "loss": 1.2906, "step": 1417 }, { "epoch": 0.3, "learning_rate": 3.2916541836300065e-05, "loss": 1.3245, "step": 1418 }, { "epoch": 0.3, "learning_rate": 3.290613521824731e-05, "loss": 1.3229, "step": 1419 }, { "epoch": 0.3, "learning_rate": 3.28957226091066e-05, "loss": 1.2939, "step": 1420 }, { "epoch": 0.3, "learning_rate": 3.2885304013711525e-05, "loss": 1.2817, "step": 1421 }, { "epoch": 0.3, "learning_rate": 3.2874879436898444e-05, "loss": 1.2686, "step": 1422 }, { "epoch": 0.3, "learning_rate": 3.286444888350649e-05, "loss": 1.3233, "step": 1423 }, { "epoch": 0.3, "learning_rate": 3.285401235837758e-05, "loss": 1.2612, "step": 1424 }, { "epoch": 0.3, "learning_rate": 3.28435698663564e-05, "loss": 1.3166, "step": 1425 }, { "epoch": 0.3, "learning_rate": 3.283312141229039e-05, "loss": 1.3319, "step": 1426 }, { "epoch": 0.3, "learning_rate": 3.282266700102978e-05, "loss": 1.2886, "step": 1427 }, { "epoch": 0.3, "learning_rate": 3.281220663742756e-05, "loss": 1.237, "step": 1428 }, { "epoch": 0.3, "learning_rate": 3.280174032633947e-05, "loss": 1.3069, "step": 1429 }, { "epoch": 0.3, "learning_rate": 3.279126807262403e-05, "loss": 1.2735, "step": 1430 }, { "epoch": 0.3, "learning_rate": 3.27807898811425e-05, "loss": 1.2644, "step": 1431 }, { "epoch": 0.3, "learning_rate": 3.277030575675891e-05, "loss": 1.2884, "step": 1432 }, { "epoch": 0.3, "learning_rate": 3.2759815704340034e-05, "loss": 1.2894, "step": 1433 }, { "epoch": 0.3, "learning_rate": 3.2749319728755415e-05, "loss": 1.2695, "step": 1434 }, { "epoch": 0.3, "learning_rate": 3.273881783487732e-05, "loss": 1.3131, "step": 1435 }, { "epoch": 0.3, "learning_rate": 3.2728310027580786e-05, "loss": 1.3413, "step": 1436 }, { "epoch": 0.3, "learning_rate": 3.271779631174358e-05, "loss": 1.2963, "step": 1437 }, { "epoch": 0.3, "learning_rate": 3.270727669224622e-05, "loss": 1.3176, "step": 1438 }, { "epoch": 0.3, "learning_rate": 3.269675117397196e-05, "loss": 1.3155, "step": 1439 }, { "epoch": 0.3, "learning_rate": 3.268621976180681e-05, "loss": 1.2833, "step": 1440 }, { "epoch": 0.3, "learning_rate": 3.267568246063948e-05, "loss": 1.2208, "step": 1441 }, { "epoch": 0.3, "learning_rate": 3.2665139275361446e-05, "loss": 1.3166, "step": 1442 }, { "epoch": 0.3, "learning_rate": 3.26545902108669e-05, "loss": 1.2933, "step": 1443 }, { "epoch": 0.3, "learning_rate": 3.2644035272052756e-05, "loss": 1.3392, "step": 1444 }, { "epoch": 0.3, "learning_rate": 3.263347446381869e-05, "loss": 1.2914, "step": 1445 }, { "epoch": 0.3, "learning_rate": 3.2622907791067056e-05, "loss": 1.2961, "step": 1446 }, { "epoch": 0.3, "learning_rate": 3.261233525870296e-05, "loss": 1.3064, "step": 1447 }, { "epoch": 0.3, "learning_rate": 3.260175687163423e-05, "loss": 1.3228, "step": 1448 }, { "epoch": 0.3, "learning_rate": 3.259117263477138e-05, "loss": 1.2475, "step": 1449 }, { "epoch": 0.3, "learning_rate": 3.2580582553027684e-05, "loss": 1.2894, "step": 1450 }, { "epoch": 0.31, "learning_rate": 3.2569986631319104e-05, "loss": 1.2327, "step": 1451 }, { "epoch": 0.31, "learning_rate": 3.2559384874564305e-05, "loss": 1.2908, "step": 1452 }, { "epoch": 0.31, "learning_rate": 3.254877728768468e-05, "loss": 1.2182, "step": 1453 }, { "epoch": 0.31, "learning_rate": 3.2538163875604316e-05, "loss": 1.2867, "step": 1454 }, { "epoch": 0.31, "learning_rate": 3.252754464325001e-05, "loss": 1.2964, "step": 1455 }, { "epoch": 0.31, "learning_rate": 3.251691959555126e-05, "loss": 1.299, "step": 1456 }, { "epoch": 0.31, "learning_rate": 3.2506288737440265e-05, "loss": 1.2994, "step": 1457 }, { "epoch": 0.31, "learning_rate": 3.249565207385192e-05, "loss": 1.3188, "step": 1458 }, { "epoch": 0.31, "learning_rate": 3.24850096097238e-05, "loss": 1.2994, "step": 1459 }, { "epoch": 0.31, "learning_rate": 3.2474361349996205e-05, "loss": 1.3603, "step": 1460 }, { "epoch": 0.31, "learning_rate": 3.2463707299612086e-05, "loss": 1.3068, "step": 1461 }, { "epoch": 0.31, "learning_rate": 3.245304746351712e-05, "loss": 1.3158, "step": 1462 }, { "epoch": 0.31, "learning_rate": 3.2442381846659644e-05, "loss": 1.3028, "step": 1463 }, { "epoch": 0.31, "learning_rate": 3.243171045399069e-05, "loss": 1.3344, "step": 1464 }, { "epoch": 0.31, "learning_rate": 3.2421033290463966e-05, "loss": 1.262, "step": 1465 }, { "epoch": 0.31, "learning_rate": 3.241035036103587e-05, "loss": 1.2577, "step": 1466 }, { "epoch": 0.31, "learning_rate": 3.239966167066545e-05, "loss": 1.2835, "step": 1467 }, { "epoch": 0.31, "learning_rate": 3.2388967224314464e-05, "loss": 1.2941, "step": 1468 }, { "epoch": 0.31, "learning_rate": 3.2378267026947314e-05, "loss": 1.2777, "step": 1469 }, { "epoch": 0.31, "learning_rate": 3.2367561083531074e-05, "loss": 1.2969, "step": 1470 }, { "epoch": 0.31, "learning_rate": 3.235684939903551e-05, "loss": 1.3229, "step": 1471 }, { "epoch": 0.31, "learning_rate": 3.234613197843302e-05, "loss": 1.31, "step": 1472 }, { "epoch": 0.31, "learning_rate": 3.233540882669869e-05, "loss": 1.2708, "step": 1473 }, { "epoch": 0.31, "learning_rate": 3.232467994881026e-05, "loss": 1.2625, "step": 1474 }, { "epoch": 0.31, "learning_rate": 3.2313945349748116e-05, "loss": 1.3055, "step": 1475 }, { "epoch": 0.31, "learning_rate": 3.230320503449531e-05, "loss": 1.2882, "step": 1476 }, { "epoch": 0.31, "learning_rate": 3.2292459008037554e-05, "loss": 1.3261, "step": 1477 }, { "epoch": 0.31, "learning_rate": 3.228170727536319e-05, "loss": 1.3127, "step": 1478 }, { "epoch": 0.31, "learning_rate": 3.227094984146323e-05, "loss": 1.2909, "step": 1479 }, { "epoch": 0.31, "learning_rate": 3.226018671133134e-05, "loss": 1.2853, "step": 1480 }, { "epoch": 0.31, "learning_rate": 3.224941788996378e-05, "loss": 1.2684, "step": 1481 }, { "epoch": 0.31, "learning_rate": 3.223864338235951e-05, "loss": 1.3026, "step": 1482 }, { "epoch": 0.31, "learning_rate": 3.2227863193520115e-05, "loss": 1.3298, "step": 1483 }, { "epoch": 0.31, "learning_rate": 3.221707732844979e-05, "loss": 1.2929, "step": 1484 }, { "epoch": 0.31, "learning_rate": 3.220628579215539e-05, "loss": 1.3084, "step": 1485 }, { "epoch": 0.31, "learning_rate": 3.21954885896464e-05, "loss": 1.3481, "step": 1486 }, { "epoch": 0.31, "learning_rate": 3.2184685725934926e-05, "loss": 1.286, "step": 1487 }, { "epoch": 0.31, "learning_rate": 3.2173877206035714e-05, "loss": 1.3556, "step": 1488 }, { "epoch": 0.31, "learning_rate": 3.2163063034966126e-05, "loss": 1.2548, "step": 1489 }, { "epoch": 0.31, "learning_rate": 3.215224321774614e-05, "loss": 1.2961, "step": 1490 }, { "epoch": 0.31, "learning_rate": 3.214141775939839e-05, "loss": 1.2892, "step": 1491 }, { "epoch": 0.31, "learning_rate": 3.213058666494807e-05, "loss": 1.2803, "step": 1492 }, { "epoch": 0.31, "learning_rate": 3.2119749939423063e-05, "loss": 1.3108, "step": 1493 }, { "epoch": 0.31, "learning_rate": 3.2108907587853794e-05, "loss": 1.3067, "step": 1494 }, { "epoch": 0.31, "learning_rate": 3.2098059615273354e-05, "loss": 1.2733, "step": 1495 }, { "epoch": 0.31, "learning_rate": 3.2087206026717415e-05, "loss": 1.2772, "step": 1496 }, { "epoch": 0.31, "learning_rate": 3.207634682722427e-05, "loss": 1.2983, "step": 1497 }, { "epoch": 0.32, "learning_rate": 3.206548202183479e-05, "loss": 1.297, "step": 1498 }, { "epoch": 0.32, "learning_rate": 3.20546116155925e-05, "loss": 1.2824, "step": 1499 }, { "epoch": 0.32, "learning_rate": 3.204373561354345e-05, "loss": 1.2834, "step": 1500 }, { "epoch": 0.32, "learning_rate": 3.203285402073637e-05, "loss": 1.3461, "step": 1501 }, { "epoch": 0.32, "learning_rate": 3.202196684222253e-05, "loss": 1.2753, "step": 1502 }, { "epoch": 0.32, "learning_rate": 3.2011074083055814e-05, "loss": 1.283, "step": 1503 }, { "epoch": 0.32, "learning_rate": 3.200017574829268e-05, "loss": 1.3074, "step": 1504 }, { "epoch": 0.32, "learning_rate": 3.19892718429922e-05, "loss": 1.2996, "step": 1505 }, { "epoch": 0.32, "learning_rate": 3.1978362372216006e-05, "loss": 1.3166, "step": 1506 }, { "epoch": 0.32, "learning_rate": 3.196744734102833e-05, "loss": 1.3329, "step": 1507 }, { "epoch": 0.32, "learning_rate": 3.195652675449599e-05, "loss": 1.2842, "step": 1508 }, { "epoch": 0.32, "learning_rate": 3.194560061768835e-05, "loss": 1.314, "step": 1509 }, { "epoch": 0.32, "learning_rate": 3.193466893567739e-05, "loss": 1.2629, "step": 1510 }, { "epoch": 0.32, "learning_rate": 3.192373171353765e-05, "loss": 1.3123, "step": 1511 }, { "epoch": 0.32, "learning_rate": 3.1912788956346226e-05, "loss": 1.2651, "step": 1512 }, { "epoch": 0.32, "learning_rate": 3.190184066918281e-05, "loss": 1.2869, "step": 1513 }, { "epoch": 0.32, "learning_rate": 3.189088685712964e-05, "loss": 1.3235, "step": 1514 }, { "epoch": 0.32, "learning_rate": 3.187992752527153e-05, "loss": 1.2989, "step": 1515 }, { "epoch": 0.32, "learning_rate": 3.186896267869585e-05, "loss": 1.3157, "step": 1516 }, { "epoch": 0.32, "learning_rate": 3.1857992322492525e-05, "loss": 1.308, "step": 1517 }, { "epoch": 0.32, "learning_rate": 3.184701646175407e-05, "loss": 1.2973, "step": 1518 }, { "epoch": 0.32, "learning_rate": 3.183603510157551e-05, "loss": 1.3628, "step": 1519 }, { "epoch": 0.32, "learning_rate": 3.1825048247054444e-05, "loss": 1.3117, "step": 1520 }, { "epoch": 0.32, "learning_rate": 3.181405590329102e-05, "loss": 1.2541, "step": 1521 }, { "epoch": 0.32, "learning_rate": 3.180305807538796e-05, "loss": 1.3173, "step": 1522 }, { "epoch": 0.32, "learning_rate": 3.1792054768450466e-05, "loss": 1.267, "step": 1523 }, { "epoch": 0.32, "learning_rate": 3.178104598758636e-05, "loss": 1.3344, "step": 1524 }, { "epoch": 0.32, "learning_rate": 3.1770031737905946e-05, "loss": 1.3157, "step": 1525 }, { "epoch": 0.32, "learning_rate": 3.1759012024522103e-05, "loss": 1.2994, "step": 1526 }, { "epoch": 0.32, "learning_rate": 3.1747986852550225e-05, "loss": 1.3523, "step": 1527 }, { "epoch": 0.32, "learning_rate": 3.173695622710826e-05, "loss": 1.3186, "step": 1528 }, { "epoch": 0.32, "learning_rate": 3.172592015331666e-05, "loss": 1.2854, "step": 1529 }, { "epoch": 0.32, "learning_rate": 3.171487863629843e-05, "loss": 1.2563, "step": 1530 }, { "epoch": 0.32, "learning_rate": 3.1703831681179096e-05, "loss": 1.3092, "step": 1531 }, { "epoch": 0.32, "learning_rate": 3.169277929308669e-05, "loss": 1.3017, "step": 1532 }, { "epoch": 0.32, "learning_rate": 3.168172147715181e-05, "loss": 1.299, "step": 1533 }, { "epoch": 0.32, "learning_rate": 3.1670658238507524e-05, "loss": 1.3178, "step": 1534 }, { "epoch": 0.32, "learning_rate": 3.1659589582289446e-05, "loss": 1.2963, "step": 1535 }, { "epoch": 0.32, "learning_rate": 3.16485155136357e-05, "loss": 1.3274, "step": 1536 }, { "epoch": 0.32, "learning_rate": 3.16374360376869e-05, "loss": 1.2938, "step": 1537 }, { "epoch": 0.32, "learning_rate": 3.1626351159586224e-05, "loss": 1.2672, "step": 1538 }, { "epoch": 0.32, "learning_rate": 3.1615260884479304e-05, "loss": 1.3287, "step": 1539 }, { "epoch": 0.32, "learning_rate": 3.1604165217514296e-05, "loss": 1.3223, "step": 1540 }, { "epoch": 0.32, "learning_rate": 3.159306416384187e-05, "loss": 1.2831, "step": 1541 }, { "epoch": 0.32, "learning_rate": 3.158195772861517e-05, "loss": 1.3246, "step": 1542 }, { "epoch": 0.32, "learning_rate": 3.1570845916989875e-05, "loss": 1.3066, "step": 1543 }, { "epoch": 0.32, "learning_rate": 3.1559728734124125e-05, "loss": 1.3554, "step": 1544 }, { "epoch": 0.32, "learning_rate": 3.154860618517858e-05, "loss": 1.2883, "step": 1545 }, { "epoch": 0.33, "learning_rate": 3.1537478275316364e-05, "loss": 1.2856, "step": 1546 }, { "epoch": 0.33, "learning_rate": 3.152634500970312e-05, "loss": 1.3114, "step": 1547 }, { "epoch": 0.33, "learning_rate": 3.151520639350695e-05, "loss": 1.3208, "step": 1548 }, { "epoch": 0.33, "learning_rate": 3.150406243189846e-05, "loss": 1.3286, "step": 1549 }, { "epoch": 0.33, "learning_rate": 3.1492913130050715e-05, "loss": 1.3066, "step": 1550 }, { "epoch": 0.33, "learning_rate": 3.1481758493139295e-05, "loss": 1.3445, "step": 1551 }, { "epoch": 0.33, "learning_rate": 3.147059852634221e-05, "loss": 1.3027, "step": 1552 }, { "epoch": 0.33, "learning_rate": 3.145943323483999e-05, "loss": 1.3306, "step": 1553 }, { "epoch": 0.33, "learning_rate": 3.144826262381559e-05, "loss": 1.3253, "step": 1554 }, { "epoch": 0.33, "learning_rate": 3.143708669845449e-05, "loss": 1.2997, "step": 1555 }, { "epoch": 0.33, "learning_rate": 3.142590546394459e-05, "loss": 1.2951, "step": 1556 }, { "epoch": 0.33, "learning_rate": 3.141471892547627e-05, "loss": 1.3165, "step": 1557 }, { "epoch": 0.33, "learning_rate": 3.140352708824237e-05, "loss": 1.2899, "step": 1558 }, { "epoch": 0.33, "learning_rate": 3.1392329957438203e-05, "loss": 1.3088, "step": 1559 }, { "epoch": 0.33, "learning_rate": 3.1381127538261524e-05, "loss": 1.298, "step": 1560 }, { "epoch": 0.33, "learning_rate": 3.136991983591255e-05, "loss": 1.2857, "step": 1561 }, { "epoch": 0.33, "learning_rate": 3.1358706855593935e-05, "loss": 1.2902, "step": 1562 }, { "epoch": 0.33, "learning_rate": 3.1347488602510824e-05, "loss": 1.3583, "step": 1563 }, { "epoch": 0.33, "learning_rate": 3.133626508187076e-05, "loss": 1.3131, "step": 1564 }, { "epoch": 0.33, "learning_rate": 3.132503629888376e-05, "loss": 1.2533, "step": 1565 }, { "epoch": 0.33, "learning_rate": 3.131380225876228e-05, "loss": 1.2924, "step": 1566 }, { "epoch": 0.33, "learning_rate": 3.130256296672121e-05, "loss": 1.3152, "step": 1567 }, { "epoch": 0.33, "learning_rate": 3.1291318427977876e-05, "loss": 1.2727, "step": 1568 }, { "epoch": 0.33, "learning_rate": 3.1280068647752066e-05, "loss": 1.3052, "step": 1569 }, { "epoch": 0.33, "learning_rate": 3.126881363126595e-05, "loss": 1.3038, "step": 1570 }, { "epoch": 0.33, "learning_rate": 3.1257553383744186e-05, "loss": 1.3172, "step": 1571 }, { "epoch": 0.33, "learning_rate": 3.1246287910413824e-05, "loss": 1.2669, "step": 1572 }, { "epoch": 0.33, "learning_rate": 3.123501721650434e-05, "loss": 1.2939, "step": 1573 }, { "epoch": 0.33, "learning_rate": 3.122374130724765e-05, "loss": 1.302, "step": 1574 }, { "epoch": 0.33, "learning_rate": 3.1212460187878085e-05, "loss": 1.2917, "step": 1575 }, { "epoch": 0.33, "learning_rate": 3.1201173863632396e-05, "loss": 1.3215, "step": 1576 }, { "epoch": 0.33, "learning_rate": 3.1189882339749735e-05, "loss": 1.281, "step": 1577 }, { "epoch": 0.33, "learning_rate": 3.117858562147169e-05, "loss": 1.3307, "step": 1578 }, { "epoch": 0.33, "learning_rate": 3.116728371404225e-05, "loss": 1.2713, "step": 1579 }, { "epoch": 0.33, "learning_rate": 3.115597662270781e-05, "loss": 1.2681, "step": 1580 }, { "epoch": 0.33, "learning_rate": 3.114466435271717e-05, "loss": 1.2676, "step": 1581 }, { "epoch": 0.33, "learning_rate": 3.113334690932155e-05, "loss": 1.2849, "step": 1582 }, { "epoch": 0.33, "learning_rate": 3.1122024297774545e-05, "loss": 1.3106, "step": 1583 }, { "epoch": 0.33, "learning_rate": 3.111069652333219e-05, "loss": 1.3076, "step": 1584 }, { "epoch": 0.33, "learning_rate": 3.1099363591252844e-05, "loss": 1.2546, "step": 1585 }, { "epoch": 0.33, "learning_rate": 3.1088025506797356e-05, "loss": 1.3116, "step": 1586 }, { "epoch": 0.33, "learning_rate": 3.107668227522889e-05, "loss": 1.3047, "step": 1587 }, { "epoch": 0.33, "learning_rate": 3.106533390181304e-05, "loss": 1.3274, "step": 1588 }, { "epoch": 0.33, "learning_rate": 3.105398039181775e-05, "loss": 1.2934, "step": 1589 }, { "epoch": 0.33, "learning_rate": 3.1042621750513405e-05, "loss": 1.2906, "step": 1590 }, { "epoch": 0.33, "learning_rate": 3.103125798317272e-05, "loss": 1.26, "step": 1591 }, { "epoch": 0.33, "learning_rate": 3.101988909507081e-05, "loss": 1.2634, "step": 1592 }, { "epoch": 0.34, "learning_rate": 3.100851509148517e-05, "loss": 1.2759, "step": 1593 }, { "epoch": 0.34, "learning_rate": 3.099713597769566e-05, "loss": 1.3192, "step": 1594 }, { "epoch": 0.34, "learning_rate": 3.098575175898452e-05, "loss": 1.2767, "step": 1595 }, { "epoch": 0.34, "learning_rate": 3.097436244063636e-05, "loss": 1.2558, "step": 1596 }, { "epoch": 0.34, "learning_rate": 3.0962968027938156e-05, "loss": 1.3523, "step": 1597 }, { "epoch": 0.34, "learning_rate": 3.0951568526179235e-05, "loss": 1.3043, "step": 1598 }, { "epoch": 0.34, "learning_rate": 3.094016394065131e-05, "loss": 1.3225, "step": 1599 }, { "epoch": 0.34, "learning_rate": 3.0928754276648443e-05, "loss": 1.3094, "step": 1600 }, { "epoch": 0.34, "learning_rate": 3.091733953946705e-05, "loss": 1.2396, "step": 1601 }, { "epoch": 0.34, "learning_rate": 3.09059197344059e-05, "loss": 1.2891, "step": 1602 }, { "epoch": 0.34, "learning_rate": 3.089449486676613e-05, "loss": 1.3109, "step": 1603 }, { "epoch": 0.34, "learning_rate": 3.08830649418512e-05, "loss": 1.3257, "step": 1604 }, { "epoch": 0.34, "learning_rate": 3.087162996496696e-05, "loss": 1.3005, "step": 1605 }, { "epoch": 0.34, "learning_rate": 3.086018994142156e-05, "loss": 1.3104, "step": 1606 }, { "epoch": 0.34, "learning_rate": 3.084874487652551e-05, "loss": 1.3131, "step": 1607 }, { "epoch": 0.34, "learning_rate": 3.0837294775591675e-05, "loss": 1.3353, "step": 1608 }, { "epoch": 0.34, "learning_rate": 3.082583964393524e-05, "loss": 1.3006, "step": 1609 }, { "epoch": 0.34, "learning_rate": 3.081437948687373e-05, "loss": 1.3282, "step": 1610 }, { "epoch": 0.34, "learning_rate": 3.0802914309727004e-05, "loss": 1.3562, "step": 1611 }, { "epoch": 0.34, "learning_rate": 3.0791444117817247e-05, "loss": 1.2589, "step": 1612 }, { "epoch": 0.34, "learning_rate": 3.0779968916468974e-05, "loss": 1.3137, "step": 1613 }, { "epoch": 0.34, "learning_rate": 3.076848871100904e-05, "loss": 1.3063, "step": 1614 }, { "epoch": 0.34, "learning_rate": 3.075700350676659e-05, "loss": 1.3101, "step": 1615 }, { "epoch": 0.34, "learning_rate": 3.074551330907312e-05, "loss": 1.3169, "step": 1616 }, { "epoch": 0.34, "learning_rate": 3.073401812326244e-05, "loss": 1.2861, "step": 1617 }, { "epoch": 0.34, "learning_rate": 3.072251795467065e-05, "loss": 1.306, "step": 1618 }, { "epoch": 0.34, "learning_rate": 3.071101280863621e-05, "loss": 1.2439, "step": 1619 }, { "epoch": 0.34, "learning_rate": 3.069950269049983e-05, "loss": 1.289, "step": 1620 }, { "epoch": 0.34, "learning_rate": 3.068798760560458e-05, "loss": 1.3182, "step": 1621 }, { "epoch": 0.34, "learning_rate": 3.067646755929582e-05, "loss": 1.2638, "step": 1622 }, { "epoch": 0.34, "learning_rate": 3.066494255692119e-05, "loss": 1.2697, "step": 1623 }, { "epoch": 0.34, "learning_rate": 3.0653412603830665e-05, "loss": 1.3101, "step": 1624 }, { "epoch": 0.34, "learning_rate": 3.06418777053765e-05, "loss": 1.3043, "step": 1625 }, { "epoch": 0.34, "learning_rate": 3.0630337866913236e-05, "loss": 1.2993, "step": 1626 }, { "epoch": 0.34, "learning_rate": 3.061879309379774e-05, "loss": 1.2612, "step": 1627 }, { "epoch": 0.34, "learning_rate": 3.060724339138913e-05, "loss": 1.3094, "step": 1628 }, { "epoch": 0.34, "learning_rate": 3.0595688765048855e-05, "loss": 1.2825, "step": 1629 }, { "epoch": 0.34, "learning_rate": 3.058412922014061e-05, "loss": 1.353, "step": 1630 }, { "epoch": 0.34, "learning_rate": 3.057256476203038e-05, "loss": 1.2747, "step": 1631 }, { "epoch": 0.34, "learning_rate": 3.056099539608646e-05, "loss": 1.3117, "step": 1632 }, { "epoch": 0.34, "learning_rate": 3.0549421127679395e-05, "loss": 1.3436, "step": 1633 }, { "epoch": 0.34, "learning_rate": 3.053784196218201e-05, "loss": 1.3072, "step": 1634 }, { "epoch": 0.34, "learning_rate": 3.052625790496942e-05, "loss": 1.3141, "step": 1635 }, { "epoch": 0.34, "learning_rate": 3.0514668961418984e-05, "loss": 1.2224, "step": 1636 }, { "epoch": 0.34, "learning_rate": 3.050307513691035e-05, "loss": 1.2675, "step": 1637 }, { "epoch": 0.34, "learning_rate": 3.0491476436825427e-05, "loss": 1.3372, "step": 1638 }, { "epoch": 0.34, "learning_rate": 3.047987286654838e-05, "loss": 1.2686, "step": 1639 }, { "epoch": 0.34, "learning_rate": 3.0468264431465643e-05, "loss": 1.3408, "step": 1640 }, { "epoch": 0.35, "learning_rate": 3.045665113696591e-05, "loss": 1.3472, "step": 1641 }, { "epoch": 0.35, "learning_rate": 3.0445032988440126e-05, "loss": 1.301, "step": 1642 }, { "epoch": 0.35, "learning_rate": 3.0433409991281483e-05, "loss": 1.2907, "step": 1643 }, { "epoch": 0.35, "learning_rate": 3.042178215088543e-05, "loss": 1.2662, "step": 1644 }, { "epoch": 0.35, "learning_rate": 3.041014947264967e-05, "loss": 1.239, "step": 1645 }, { "epoch": 0.35, "learning_rate": 3.0398511961974143e-05, "loss": 1.2055, "step": 1646 }, { "epoch": 0.35, "learning_rate": 3.0386869624261036e-05, "loss": 1.2929, "step": 1647 }, { "epoch": 0.35, "learning_rate": 3.0375222464914782e-05, "loss": 1.2832, "step": 1648 }, { "epoch": 0.35, "learning_rate": 3.0363570489342033e-05, "loss": 1.2852, "step": 1649 }, { "epoch": 0.35, "learning_rate": 3.03519137029517e-05, "loss": 1.2865, "step": 1650 }, { "epoch": 0.35, "learning_rate": 3.034025211115492e-05, "loss": 1.2482, "step": 1651 }, { "epoch": 0.35, "learning_rate": 3.0328585719365057e-05, "loss": 1.2963, "step": 1652 }, { "epoch": 0.35, "learning_rate": 3.0316914532997694e-05, "loss": 1.2997, "step": 1653 }, { "epoch": 0.35, "learning_rate": 3.030523855747066e-05, "loss": 1.3042, "step": 1654 }, { "epoch": 0.35, "learning_rate": 3.0293557798203998e-05, "loss": 1.3074, "step": 1655 }, { "epoch": 0.35, "learning_rate": 3.0281872260619965e-05, "loss": 1.3307, "step": 1656 }, { "epoch": 0.35, "learning_rate": 3.0270181950143045e-05, "loss": 1.2965, "step": 1657 }, { "epoch": 0.35, "learning_rate": 3.025848687219993e-05, "loss": 1.3051, "step": 1658 }, { "epoch": 0.35, "learning_rate": 3.0246787032219535e-05, "loss": 1.344, "step": 1659 }, { "epoch": 0.35, "learning_rate": 3.0235082435632984e-05, "loss": 1.3226, "step": 1660 }, { "epoch": 0.35, "learning_rate": 3.022337308787359e-05, "loss": 1.2981, "step": 1661 }, { "epoch": 0.35, "learning_rate": 3.02116589943769e-05, "loss": 1.2949, "step": 1662 }, { "epoch": 0.35, "learning_rate": 3.019994016058064e-05, "loss": 1.2734, "step": 1663 }, { "epoch": 0.35, "learning_rate": 3.018821659192476e-05, "loss": 1.2922, "step": 1664 }, { "epoch": 0.35, "learning_rate": 3.0176488293851388e-05, "loss": 1.2911, "step": 1665 }, { "epoch": 0.35, "learning_rate": 3.0164755271804856e-05, "loss": 1.2955, "step": 1666 }, { "epoch": 0.35, "learning_rate": 3.015301753123169e-05, "loss": 1.327, "step": 1667 }, { "epoch": 0.35, "learning_rate": 3.0141275077580592e-05, "loss": 1.2304, "step": 1668 }, { "epoch": 0.35, "learning_rate": 3.0129527916302482e-05, "loss": 1.3248, "step": 1669 }, { "epoch": 0.35, "learning_rate": 3.0117776052850427e-05, "loss": 1.2967, "step": 1670 }, { "epoch": 0.35, "learning_rate": 3.0106019492679714e-05, "loss": 1.2503, "step": 1671 }, { "epoch": 0.35, "learning_rate": 3.009425824124778e-05, "loss": 1.2552, "step": 1672 }, { "epoch": 0.35, "learning_rate": 3.008249230401426e-05, "loss": 1.2909, "step": 1673 }, { "epoch": 0.35, "learning_rate": 3.0070721686440953e-05, "loss": 1.3333, "step": 1674 }, { "epoch": 0.35, "learning_rate": 3.0058946393991833e-05, "loss": 1.2823, "step": 1675 }, { "epoch": 0.35, "learning_rate": 3.004716643213305e-05, "loss": 1.2726, "step": 1676 }, { "epoch": 0.35, "learning_rate": 3.003538180633292e-05, "loss": 1.3186, "step": 1677 }, { "epoch": 0.35, "learning_rate": 3.0023592522061916e-05, "loss": 1.2841, "step": 1678 }, { "epoch": 0.35, "learning_rate": 3.0011798584792672e-05, "loss": 1.3068, "step": 1679 }, { "epoch": 0.35, "learning_rate": 3.0000000000000004e-05, "loss": 1.2998, "step": 1680 }, { "epoch": 0.35, "learning_rate": 2.9988196773160857e-05, "loss": 1.3281, "step": 1681 }, { "epoch": 0.35, "learning_rate": 2.9976388909754348e-05, "loss": 1.2653, "step": 1682 }, { "epoch": 0.35, "learning_rate": 2.996457641526174e-05, "loss": 1.2684, "step": 1683 }, { "epoch": 0.35, "learning_rate": 2.995275929516646e-05, "loss": 1.3073, "step": 1684 }, { "epoch": 0.35, "learning_rate": 2.9940937554954053e-05, "loss": 1.287, "step": 1685 }, { "epoch": 0.35, "learning_rate": 2.9929111200112233e-05, "loss": 1.2919, "step": 1686 }, { "epoch": 0.35, "learning_rate": 2.991728023613085e-05, "loss": 1.2492, "step": 1687 }, { "epoch": 0.36, "learning_rate": 2.990544466850189e-05, "loss": 1.2469, "step": 1688 }, { "epoch": 0.36, "learning_rate": 2.9893604502719474e-05, "loss": 1.3062, "step": 1689 }, { "epoch": 0.36, "learning_rate": 2.9881759744279875e-05, "loss": 1.326, "step": 1690 }, { "epoch": 0.36, "learning_rate": 2.986991039868148e-05, "loss": 1.2793, "step": 1691 }, { "epoch": 0.36, "learning_rate": 2.9858056471424804e-05, "loss": 1.3325, "step": 1692 }, { "epoch": 0.36, "learning_rate": 2.98461979680125e-05, "loss": 1.306, "step": 1693 }, { "epoch": 0.36, "learning_rate": 2.983433489394934e-05, "loss": 1.334, "step": 1694 }, { "epoch": 0.36, "learning_rate": 2.9822467254742212e-05, "loss": 1.3122, "step": 1695 }, { "epoch": 0.36, "learning_rate": 2.9810595055900148e-05, "loss": 1.2584, "step": 1696 }, { "epoch": 0.36, "learning_rate": 2.9798718302934255e-05, "loss": 1.3124, "step": 1697 }, { "epoch": 0.36, "learning_rate": 2.9786837001357782e-05, "loss": 1.2975, "step": 1698 }, { "epoch": 0.36, "learning_rate": 2.9774951156686094e-05, "loss": 1.3272, "step": 1699 }, { "epoch": 0.36, "learning_rate": 2.976306077443665e-05, "loss": 1.2671, "step": 1700 }, { "epoch": 0.36, "learning_rate": 2.9751165860129024e-05, "loss": 1.305, "step": 1701 }, { "epoch": 0.36, "learning_rate": 2.973926641928489e-05, "loss": 1.3157, "step": 1702 }, { "epoch": 0.36, "learning_rate": 2.9727362457428012e-05, "loss": 1.289, "step": 1703 }, { "epoch": 0.36, "learning_rate": 2.971545398008428e-05, "loss": 1.265, "step": 1704 }, { "epoch": 0.36, "learning_rate": 2.970354099278166e-05, "loss": 1.2552, "step": 1705 }, { "epoch": 0.36, "learning_rate": 2.9691623501050212e-05, "loss": 1.2669, "step": 1706 }, { "epoch": 0.36, "learning_rate": 2.967970151042209e-05, "loss": 1.3045, "step": 1707 }, { "epoch": 0.36, "learning_rate": 2.9667775026431544e-05, "loss": 1.323, "step": 1708 }, { "epoch": 0.36, "learning_rate": 2.96558440546149e-05, "loss": 1.2951, "step": 1709 }, { "epoch": 0.36, "learning_rate": 2.9643908600510572e-05, "loss": 1.3034, "step": 1710 }, { "epoch": 0.36, "learning_rate": 2.9631968669659047e-05, "loss": 1.3261, "step": 1711 }, { "epoch": 0.36, "learning_rate": 2.9620024267602906e-05, "loss": 1.2854, "step": 1712 }, { "epoch": 0.36, "learning_rate": 2.9608075399886792e-05, "loss": 1.3118, "step": 1713 }, { "epoch": 0.36, "learning_rate": 2.9596122072057424e-05, "loss": 1.2566, "step": 1714 }, { "epoch": 0.36, "learning_rate": 2.958416428966359e-05, "loss": 1.2837, "step": 1715 }, { "epoch": 0.36, "learning_rate": 2.957220205825615e-05, "loss": 1.2366, "step": 1716 }, { "epoch": 0.36, "learning_rate": 2.956023538338803e-05, "loss": 1.3016, "step": 1717 }, { "epoch": 0.36, "learning_rate": 2.9548264270614217e-05, "loss": 1.265, "step": 1718 }, { "epoch": 0.36, "learning_rate": 2.953628872549175e-05, "loss": 1.2633, "step": 1719 }, { "epoch": 0.36, "learning_rate": 2.952430875357974e-05, "loss": 1.2747, "step": 1720 }, { "epoch": 0.36, "learning_rate": 2.9512324360439347e-05, "loss": 1.279, "step": 1721 }, { "epoch": 0.36, "learning_rate": 2.9500335551633773e-05, "loss": 1.3096, "step": 1722 }, { "epoch": 0.36, "learning_rate": 2.9488342332728276e-05, "loss": 1.3025, "step": 1723 }, { "epoch": 0.36, "learning_rate": 2.9476344709290175e-05, "loss": 1.2609, "step": 1724 }, { "epoch": 0.36, "learning_rate": 2.9464342686888826e-05, "loss": 1.2991, "step": 1725 }, { "epoch": 0.36, "learning_rate": 2.9452336271095613e-05, "loss": 1.2917, "step": 1726 }, { "epoch": 0.36, "learning_rate": 2.9440325467483974e-05, "loss": 1.3211, "step": 1727 }, { "epoch": 0.36, "learning_rate": 2.942831028162938e-05, "loss": 1.3183, "step": 1728 }, { "epoch": 0.36, "learning_rate": 2.9416290719109333e-05, "loss": 1.3264, "step": 1729 }, { "epoch": 0.36, "learning_rate": 2.9404266785503376e-05, "loss": 1.2962, "step": 1730 }, { "epoch": 0.36, "learning_rate": 2.9392238486393068e-05, "loss": 1.28, "step": 1731 }, { "epoch": 0.36, "learning_rate": 2.9380205827362007e-05, "loss": 1.2701, "step": 1732 }, { "epoch": 0.36, "learning_rate": 2.9368168813995806e-05, "loss": 1.2984, "step": 1733 }, { "epoch": 0.36, "learning_rate": 2.9356127451882105e-05, "loss": 1.2856, "step": 1734 }, { "epoch": 0.36, "learning_rate": 2.934408174661055e-05, "loss": 1.264, "step": 1735 }, { "epoch": 0.37, "learning_rate": 2.933203170377283e-05, "loss": 1.2299, "step": 1736 }, { "epoch": 0.37, "learning_rate": 2.931997732896262e-05, "loss": 1.2662, "step": 1737 }, { "epoch": 0.37, "learning_rate": 2.9307918627775627e-05, "loss": 1.2906, "step": 1738 }, { "epoch": 0.37, "learning_rate": 2.9295855605809543e-05, "loss": 1.2641, "step": 1739 }, { "epoch": 0.37, "learning_rate": 2.9283788268664085e-05, "loss": 1.3194, "step": 1740 }, { "epoch": 0.37, "learning_rate": 2.9271716621940965e-05, "loss": 1.3055, "step": 1741 }, { "epoch": 0.37, "learning_rate": 2.9259640671243903e-05, "loss": 1.3295, "step": 1742 }, { "epoch": 0.37, "learning_rate": 2.9247560422178604e-05, "loss": 1.3002, "step": 1743 }, { "epoch": 0.37, "learning_rate": 2.923547588035278e-05, "loss": 1.2909, "step": 1744 }, { "epoch": 0.37, "learning_rate": 2.9223387051376133e-05, "loss": 1.3057, "step": 1745 }, { "epoch": 0.37, "learning_rate": 2.921129394086035e-05, "loss": 1.3612, "step": 1746 }, { "epoch": 0.37, "learning_rate": 2.919919655441911e-05, "loss": 1.2866, "step": 1747 }, { "epoch": 0.37, "learning_rate": 2.9187094897668076e-05, "loss": 1.277, "step": 1748 }, { "epoch": 0.37, "learning_rate": 2.9174988976224897e-05, "loss": 1.241, "step": 1749 }, { "epoch": 0.37, "learning_rate": 2.916287879570919e-05, "loss": 1.2827, "step": 1750 }, { "epoch": 0.37, "learning_rate": 2.9150764361742564e-05, "loss": 1.3067, "step": 1751 }, { "epoch": 0.37, "learning_rate": 2.9138645679948596e-05, "loss": 1.3002, "step": 1752 }, { "epoch": 0.37, "learning_rate": 2.912652275595283e-05, "loss": 1.3055, "step": 1753 }, { "epoch": 0.37, "learning_rate": 2.9114395595382786e-05, "loss": 1.2862, "step": 1754 }, { "epoch": 0.37, "learning_rate": 2.9102264203867948e-05, "loss": 1.284, "step": 1755 }, { "epoch": 0.37, "learning_rate": 2.9090128587039763e-05, "loss": 1.3091, "step": 1756 }, { "epoch": 0.37, "learning_rate": 2.9077988750531645e-05, "loss": 1.2661, "step": 1757 }, { "epoch": 0.37, "learning_rate": 2.9065844699978957e-05, "loss": 1.3209, "step": 1758 }, { "epoch": 0.37, "learning_rate": 2.9053696441019022e-05, "loss": 1.2794, "step": 1759 }, { "epoch": 0.37, "learning_rate": 2.9041543979291125e-05, "loss": 1.2528, "step": 1760 }, { "epoch": 0.37, "learning_rate": 2.90293873204365e-05, "loss": 1.2654, "step": 1761 }, { "epoch": 0.37, "learning_rate": 2.9017226470098307e-05, "loss": 1.2904, "step": 1762 }, { "epoch": 0.37, "learning_rate": 2.9005061433921685e-05, "loss": 1.2665, "step": 1763 }, { "epoch": 0.37, "learning_rate": 2.8992892217553693e-05, "loss": 1.3105, "step": 1764 }, { "epoch": 0.37, "learning_rate": 2.898071882664334e-05, "loss": 1.2745, "step": 1765 }, { "epoch": 0.37, "learning_rate": 2.896854126684157e-05, "loss": 1.2836, "step": 1766 }, { "epoch": 0.37, "learning_rate": 2.895635954380127e-05, "loss": 1.2484, "step": 1767 }, { "epoch": 0.37, "learning_rate": 2.894417366317724e-05, "loss": 1.2963, "step": 1768 }, { "epoch": 0.37, "learning_rate": 2.8931983630626218e-05, "loss": 1.2534, "step": 1769 }, { "epoch": 0.37, "learning_rate": 2.8919789451806893e-05, "loss": 1.285, "step": 1770 }, { "epoch": 0.37, "learning_rate": 2.890759113237985e-05, "loss": 1.2559, "step": 1771 }, { "epoch": 0.37, "learning_rate": 2.8895388678007602e-05, "loss": 1.3274, "step": 1772 }, { "epoch": 0.37, "learning_rate": 2.8883182094354594e-05, "loss": 1.1933, "step": 1773 }, { "epoch": 0.37, "learning_rate": 2.887097138708717e-05, "loss": 1.266, "step": 1774 }, { "epoch": 0.37, "learning_rate": 2.8858756561873605e-05, "loss": 1.2943, "step": 1775 }, { "epoch": 0.37, "learning_rate": 2.884653762438407e-05, "loss": 1.3024, "step": 1776 }, { "epoch": 0.37, "learning_rate": 2.8834314580290655e-05, "loss": 1.2913, "step": 1777 }, { "epoch": 0.37, "learning_rate": 2.882208743526736e-05, "loss": 1.2376, "step": 1778 }, { "epoch": 0.37, "learning_rate": 2.8809856194990067e-05, "loss": 1.3143, "step": 1779 }, { "epoch": 0.37, "learning_rate": 2.8797620865136594e-05, "loss": 1.2916, "step": 1780 }, { "epoch": 0.37, "learning_rate": 2.8785381451386628e-05, "loss": 1.3028, "step": 1781 }, { "epoch": 0.37, "learning_rate": 2.877313795942176e-05, "loss": 1.2968, "step": 1782 }, { "epoch": 0.38, "learning_rate": 2.8760890394925477e-05, "loss": 1.2332, "step": 1783 }, { "epoch": 0.38, "learning_rate": 2.8748638763583158e-05, "loss": 1.3258, "step": 1784 }, { "epoch": 0.38, "learning_rate": 2.8736383071082065e-05, "loss": 1.3252, "step": 1785 }, { "epoch": 0.38, "learning_rate": 2.872412332311135e-05, "loss": 1.2971, "step": 1786 }, { "epoch": 0.38, "learning_rate": 2.8711859525362045e-05, "loss": 1.2805, "step": 1787 }, { "epoch": 0.38, "learning_rate": 2.8699591683527058e-05, "loss": 1.3081, "step": 1788 }, { "epoch": 0.38, "learning_rate": 2.8687319803301183e-05, "loss": 1.2288, "step": 1789 }, { "epoch": 0.38, "learning_rate": 2.867504389038108e-05, "loss": 1.2787, "step": 1790 }, { "epoch": 0.38, "learning_rate": 2.8662763950465284e-05, "loss": 1.3432, "step": 1791 }, { "epoch": 0.38, "learning_rate": 2.8650479989254206e-05, "loss": 1.2632, "step": 1792 }, { "epoch": 0.38, "learning_rate": 2.8638192012450115e-05, "loss": 1.266, "step": 1793 }, { "epoch": 0.38, "learning_rate": 2.862590002575714e-05, "loss": 1.2915, "step": 1794 }, { "epoch": 0.38, "learning_rate": 2.861360403488129e-05, "loss": 1.3058, "step": 1795 }, { "epoch": 0.38, "learning_rate": 2.8601304045530414e-05, "loss": 1.2763, "step": 1796 }, { "epoch": 0.38, "learning_rate": 2.858900006341422e-05, "loss": 1.2801, "step": 1797 }, { "epoch": 0.38, "learning_rate": 2.8576692094244286e-05, "loss": 1.2901, "step": 1798 }, { "epoch": 0.38, "learning_rate": 2.856438014373402e-05, "loss": 1.2949, "step": 1799 }, { "epoch": 0.38, "learning_rate": 2.8552064217598697e-05, "loss": 1.2914, "step": 1800 }, { "epoch": 0.38, "learning_rate": 2.853974432155541e-05, "loss": 1.3045, "step": 1801 }, { "epoch": 0.38, "learning_rate": 2.852742046132312e-05, "loss": 1.227, "step": 1802 }, { "epoch": 0.38, "learning_rate": 2.8515092642622625e-05, "loss": 1.2859, "step": 1803 }, { "epoch": 0.38, "learning_rate": 2.8502760871176546e-05, "loss": 1.2837, "step": 1804 }, { "epoch": 0.38, "learning_rate": 2.8490425152709367e-05, "loss": 1.3576, "step": 1805 }, { "epoch": 0.38, "learning_rate": 2.847808549294736e-05, "loss": 1.2636, "step": 1806 }, { "epoch": 0.38, "learning_rate": 2.8465741897618673e-05, "loss": 1.3152, "step": 1807 }, { "epoch": 0.38, "learning_rate": 2.8453394372453253e-05, "loss": 1.348, "step": 1808 }, { "epoch": 0.38, "learning_rate": 2.8441042923182872e-05, "loss": 1.2609, "step": 1809 }, { "epoch": 0.38, "learning_rate": 2.842868755554114e-05, "loss": 1.2705, "step": 1810 }, { "epoch": 0.38, "learning_rate": 2.8416328275263472e-05, "loss": 1.2899, "step": 1811 }, { "epoch": 0.38, "learning_rate": 2.8403965088087105e-05, "loss": 1.3033, "step": 1812 }, { "epoch": 0.38, "learning_rate": 2.839159799975109e-05, "loss": 1.3166, "step": 1813 }, { "epoch": 0.38, "learning_rate": 2.8379227015996283e-05, "loss": 1.2699, "step": 1814 }, { "epoch": 0.38, "learning_rate": 2.8366852142565352e-05, "loss": 1.2821, "step": 1815 }, { "epoch": 0.38, "learning_rate": 2.8354473385202772e-05, "loss": 1.2856, "step": 1816 }, { "epoch": 0.38, "learning_rate": 2.834209074965482e-05, "loss": 1.3096, "step": 1817 }, { "epoch": 0.38, "learning_rate": 2.8329704241669574e-05, "loss": 1.2801, "step": 1818 }, { "epoch": 0.38, "learning_rate": 2.8317313866996897e-05, "loss": 1.2775, "step": 1819 }, { "epoch": 0.38, "learning_rate": 2.830491963138848e-05, "loss": 1.2593, "step": 1820 }, { "epoch": 0.38, "learning_rate": 2.8292521540597767e-05, "loss": 1.2412, "step": 1821 }, { "epoch": 0.38, "learning_rate": 2.828011960038002e-05, "loss": 1.2641, "step": 1822 }, { "epoch": 0.38, "learning_rate": 2.826771381649227e-05, "loss": 1.2691, "step": 1823 }, { "epoch": 0.38, "learning_rate": 2.8255304194693343e-05, "loss": 1.2985, "step": 1824 }, { "epoch": 0.38, "learning_rate": 2.8242890740743844e-05, "loss": 1.2997, "step": 1825 }, { "epoch": 0.38, "learning_rate": 2.8230473460406154e-05, "loss": 1.3265, "step": 1826 }, { "epoch": 0.38, "learning_rate": 2.8218052359444434e-05, "loss": 1.3146, "step": 1827 }, { "epoch": 0.38, "learning_rate": 2.8205627443624616e-05, "loss": 1.3128, "step": 1828 }, { "epoch": 0.38, "learning_rate": 2.8193198718714402e-05, "loss": 1.2767, "step": 1829 }, { "epoch": 0.38, "learning_rate": 2.8180766190483263e-05, "loss": 1.2702, "step": 1830 }, { "epoch": 0.39, "learning_rate": 2.8168329864702443e-05, "loss": 1.3528, "step": 1831 }, { "epoch": 0.39, "learning_rate": 2.8155889747144933e-05, "loss": 1.2752, "step": 1832 }, { "epoch": 0.39, "learning_rate": 2.8143445843585498e-05, "loss": 1.2833, "step": 1833 }, { "epoch": 0.39, "learning_rate": 2.8130998159800663e-05, "loss": 1.3128, "step": 1834 }, { "epoch": 0.39, "learning_rate": 2.8118546701568687e-05, "loss": 1.2616, "step": 1835 }, { "epoch": 0.39, "learning_rate": 2.81060914746696e-05, "loss": 1.2878, "step": 1836 }, { "epoch": 0.39, "learning_rate": 2.8093632484885182e-05, "loss": 1.2238, "step": 1837 }, { "epoch": 0.39, "learning_rate": 2.8081169737998956e-05, "loss": 1.3228, "step": 1838 }, { "epoch": 0.39, "learning_rate": 2.8068703239796175e-05, "loss": 1.242, "step": 1839 }, { "epoch": 0.39, "learning_rate": 2.805623299606385e-05, "loss": 1.2731, "step": 1840 }, { "epoch": 0.39, "learning_rate": 2.8043759012590723e-05, "loss": 1.325, "step": 1841 }, { "epoch": 0.39, "learning_rate": 2.803128129516729e-05, "loss": 1.275, "step": 1842 }, { "epoch": 0.39, "learning_rate": 2.801879984958575e-05, "loss": 1.2973, "step": 1843 }, { "epoch": 0.39, "learning_rate": 2.800631468164005e-05, "loss": 1.3062, "step": 1844 }, { "epoch": 0.39, "learning_rate": 2.7993825797125866e-05, "loss": 1.2365, "step": 1845 }, { "epoch": 0.39, "learning_rate": 2.7981333201840595e-05, "loss": 1.269, "step": 1846 }, { "epoch": 0.39, "learning_rate": 2.7968836901583364e-05, "loss": 1.2851, "step": 1847 }, { "epoch": 0.39, "learning_rate": 2.7956336902155003e-05, "loss": 1.2497, "step": 1848 }, { "epoch": 0.39, "learning_rate": 2.7943833209358076e-05, "loss": 1.2965, "step": 1849 }, { "epoch": 0.39, "learning_rate": 2.793132582899686e-05, "loss": 1.317, "step": 1850 }, { "epoch": 0.39, "learning_rate": 2.791881476687733e-05, "loss": 1.2614, "step": 1851 }, { "epoch": 0.39, "learning_rate": 2.790630002880718e-05, "loss": 1.2413, "step": 1852 }, { "epoch": 0.39, "learning_rate": 2.7893781620595818e-05, "loss": 1.2893, "step": 1853 }, { "epoch": 0.39, "learning_rate": 2.788125954805434e-05, "loss": 1.2954, "step": 1854 }, { "epoch": 0.39, "learning_rate": 2.7868733816995553e-05, "loss": 1.3131, "step": 1855 }, { "epoch": 0.39, "learning_rate": 2.7856204433233954e-05, "loss": 1.2613, "step": 1856 }, { "epoch": 0.39, "learning_rate": 2.7843671402585747e-05, "loss": 1.2626, "step": 1857 }, { "epoch": 0.39, "learning_rate": 2.783113473086882e-05, "loss": 1.2773, "step": 1858 }, { "epoch": 0.39, "learning_rate": 2.781859442390276e-05, "loss": 1.2473, "step": 1859 }, { "epoch": 0.39, "learning_rate": 2.780605048750883e-05, "loss": 1.3302, "step": 1860 }, { "epoch": 0.39, "learning_rate": 2.7793502927509988e-05, "loss": 1.2935, "step": 1861 }, { "epoch": 0.39, "learning_rate": 2.7780951749730864e-05, "loss": 1.3324, "step": 1862 }, { "epoch": 0.39, "learning_rate": 2.7768396959997783e-05, "loss": 1.3192, "step": 1863 }, { "epoch": 0.39, "learning_rate": 2.7755838564138722e-05, "loss": 1.2569, "step": 1864 }, { "epoch": 0.39, "learning_rate": 2.7743276567983354e-05, "loss": 1.247, "step": 1865 }, { "epoch": 0.39, "learning_rate": 2.7730710977363023e-05, "loss": 1.3006, "step": 1866 }, { "epoch": 0.39, "learning_rate": 2.771814179811073e-05, "loss": 1.284, "step": 1867 }, { "epoch": 0.39, "learning_rate": 2.7705569036061137e-05, "loss": 1.2943, "step": 1868 }, { "epoch": 0.39, "learning_rate": 2.7692992697050587e-05, "loss": 1.3039, "step": 1869 }, { "epoch": 0.39, "learning_rate": 2.7680412786917074e-05, "loss": 1.2616, "step": 1870 }, { "epoch": 0.39, "learning_rate": 2.7667829311500255e-05, "loss": 1.2808, "step": 1871 }, { "epoch": 0.39, "learning_rate": 2.765524227664143e-05, "loss": 1.2543, "step": 1872 }, { "epoch": 0.39, "learning_rate": 2.7642651688183558e-05, "loss": 1.2762, "step": 1873 }, { "epoch": 0.39, "learning_rate": 2.763005755197126e-05, "loss": 1.2933, "step": 1874 }, { "epoch": 0.39, "learning_rate": 2.7617459873850792e-05, "loss": 1.3022, "step": 1875 }, { "epoch": 0.39, "learning_rate": 2.760485865967004e-05, "loss": 1.2794, "step": 1876 }, { "epoch": 0.39, "learning_rate": 2.7592253915278556e-05, "loss": 1.2269, "step": 1877 }, { "epoch": 0.4, "learning_rate": 2.7579645646527522e-05, "loss": 1.2871, "step": 1878 }, { "epoch": 0.4, "learning_rate": 2.7567033859269754e-05, "loss": 1.2604, "step": 1879 }, { "epoch": 0.4, "learning_rate": 2.75544185593597e-05, "loss": 1.2772, "step": 1880 }, { "epoch": 0.4, "learning_rate": 2.754179975265344e-05, "loss": 1.2776, "step": 1881 }, { "epoch": 0.4, "learning_rate": 2.752917744500868e-05, "loss": 1.2773, "step": 1882 }, { "epoch": 0.4, "learning_rate": 2.7516551642284765e-05, "loss": 1.2553, "step": 1883 }, { "epoch": 0.4, "learning_rate": 2.7503922350342645e-05, "loss": 1.2927, "step": 1884 }, { "epoch": 0.4, "learning_rate": 2.7491289575044893e-05, "loss": 1.2888, "step": 1885 }, { "epoch": 0.4, "learning_rate": 2.7478653322255707e-05, "loss": 1.2425, "step": 1886 }, { "epoch": 0.4, "learning_rate": 2.746601359784089e-05, "loss": 1.2898, "step": 1887 }, { "epoch": 0.4, "learning_rate": 2.745337040766787e-05, "loss": 1.2953, "step": 1888 }, { "epoch": 0.4, "learning_rate": 2.744072375760566e-05, "loss": 1.3026, "step": 1889 }, { "epoch": 0.4, "learning_rate": 2.74280736535249e-05, "loss": 1.3024, "step": 1890 }, { "epoch": 0.4, "learning_rate": 2.7415420101297836e-05, "loss": 1.2661, "step": 1891 }, { "epoch": 0.4, "learning_rate": 2.7402763106798295e-05, "loss": 1.3313, "step": 1892 }, { "epoch": 0.4, "learning_rate": 2.739010267590171e-05, "loss": 1.323, "step": 1893 }, { "epoch": 0.4, "learning_rate": 2.7377438814485117e-05, "loss": 1.2621, "step": 1894 }, { "epoch": 0.4, "learning_rate": 2.7364771528427145e-05, "loss": 1.2979, "step": 1895 }, { "epoch": 0.4, "learning_rate": 2.7352100823608006e-05, "loss": 1.2774, "step": 1896 }, { "epoch": 0.4, "learning_rate": 2.733942670590949e-05, "loss": 1.3335, "step": 1897 }, { "epoch": 0.4, "learning_rate": 2.7326749181214992e-05, "loss": 1.2822, "step": 1898 }, { "epoch": 0.4, "learning_rate": 2.7314068255409466e-05, "loss": 1.2991, "step": 1899 }, { "epoch": 0.4, "learning_rate": 2.7301383934379475e-05, "loss": 1.2451, "step": 1900 }, { "epoch": 0.4, "learning_rate": 2.7288696224013124e-05, "loss": 1.3066, "step": 1901 }, { "epoch": 0.4, "learning_rate": 2.727600513020011e-05, "loss": 1.2819, "step": 1902 }, { "epoch": 0.4, "learning_rate": 2.7263310658831697e-05, "loss": 1.2558, "step": 1903 }, { "epoch": 0.4, "learning_rate": 2.725061281580073e-05, "loss": 1.3023, "step": 1904 }, { "epoch": 0.4, "learning_rate": 2.7237911607001586e-05, "loss": 1.2866, "step": 1905 }, { "epoch": 0.4, "learning_rate": 2.722520703833024e-05, "loss": 1.2923, "step": 1906 }, { "epoch": 0.4, "learning_rate": 2.7212499115684204e-05, "loss": 1.2528, "step": 1907 }, { "epoch": 0.4, "learning_rate": 2.719978784496257e-05, "loss": 1.2347, "step": 1908 }, { "epoch": 0.4, "learning_rate": 2.718707323206595e-05, "loss": 1.2529, "step": 1909 }, { "epoch": 0.4, "learning_rate": 2.717435528289653e-05, "loss": 1.2897, "step": 1910 }, { "epoch": 0.4, "learning_rate": 2.7161634003358056e-05, "loss": 1.2463, "step": 1911 }, { "epoch": 0.4, "learning_rate": 2.7148909399355785e-05, "loss": 1.2654, "step": 1912 }, { "epoch": 0.4, "learning_rate": 2.713618147679655e-05, "loss": 1.259, "step": 1913 }, { "epoch": 0.4, "learning_rate": 2.712345024158871e-05, "loss": 1.2387, "step": 1914 }, { "epoch": 0.4, "learning_rate": 2.711071569964216e-05, "loss": 1.3236, "step": 1915 }, { "epoch": 0.4, "learning_rate": 2.7097977856868336e-05, "loss": 1.3116, "step": 1916 }, { "epoch": 0.4, "learning_rate": 2.70852367191802e-05, "loss": 1.3156, "step": 1917 }, { "epoch": 0.4, "learning_rate": 2.707249229249225e-05, "loss": 1.2957, "step": 1918 }, { "epoch": 0.4, "learning_rate": 2.7059744582720515e-05, "loss": 1.2634, "step": 1919 }, { "epoch": 0.4, "learning_rate": 2.7046993595782532e-05, "loss": 1.2888, "step": 1920 }, { "epoch": 0.4, "learning_rate": 2.7034239337597378e-05, "loss": 1.2981, "step": 1921 }, { "epoch": 0.4, "learning_rate": 2.7021481814085622e-05, "loss": 1.1841, "step": 1922 }, { "epoch": 0.4, "learning_rate": 2.7008721031169378e-05, "loss": 1.297, "step": 1923 }, { "epoch": 0.4, "learning_rate": 2.699595699477226e-05, "loss": 1.2748, "step": 1924 }, { "epoch": 0.4, "learning_rate": 2.6983189710819396e-05, "loss": 1.282, "step": 1925 }, { "epoch": 0.41, "learning_rate": 2.697041918523741e-05, "loss": 1.3256, "step": 1926 }, { "epoch": 0.41, "learning_rate": 2.6957645423954438e-05, "loss": 1.2638, "step": 1927 }, { "epoch": 0.41, "learning_rate": 2.694486843290013e-05, "loss": 1.2963, "step": 1928 }, { "epoch": 0.41, "learning_rate": 2.6932088218005623e-05, "loss": 1.277, "step": 1929 }, { "epoch": 0.41, "learning_rate": 2.6919304785203543e-05, "loss": 1.2888, "step": 1930 }, { "epoch": 0.41, "learning_rate": 2.6906518140428027e-05, "loss": 1.2757, "step": 1931 }, { "epoch": 0.41, "learning_rate": 2.6893728289614693e-05, "loss": 1.286, "step": 1932 }, { "epoch": 0.41, "learning_rate": 2.688093523870065e-05, "loss": 1.3027, "step": 1933 }, { "epoch": 0.41, "learning_rate": 2.6868138993624486e-05, "loss": 1.3461, "step": 1934 }, { "epoch": 0.41, "learning_rate": 2.6855339560326284e-05, "loss": 1.2842, "step": 1935 }, { "epoch": 0.41, "learning_rate": 2.6842536944747597e-05, "loss": 1.3366, "step": 1936 }, { "epoch": 0.41, "learning_rate": 2.682973115283146e-05, "loss": 1.2429, "step": 1937 }, { "epoch": 0.41, "learning_rate": 2.6816922190522386e-05, "loss": 1.2845, "step": 1938 }, { "epoch": 0.41, "learning_rate": 2.6804110063766345e-05, "loss": 1.3069, "step": 1939 }, { "epoch": 0.41, "learning_rate": 2.679129477851079e-05, "loss": 1.2499, "step": 1940 }, { "epoch": 0.41, "learning_rate": 2.6778476340704636e-05, "loss": 1.2346, "step": 1941 }, { "epoch": 0.41, "learning_rate": 2.6765654756298264e-05, "loss": 1.2508, "step": 1942 }, { "epoch": 0.41, "learning_rate": 2.675283003124351e-05, "loss": 1.2743, "step": 1943 }, { "epoch": 0.41, "learning_rate": 2.6740002171493676e-05, "loss": 1.274, "step": 1944 }, { "epoch": 0.41, "learning_rate": 2.6727171183003502e-05, "loss": 1.2814, "step": 1945 }, { "epoch": 0.41, "learning_rate": 2.6714337071729207e-05, "loss": 1.3031, "step": 1946 }, { "epoch": 0.41, "learning_rate": 2.6701499843628443e-05, "loss": 1.2805, "step": 1947 }, { "epoch": 0.41, "learning_rate": 2.66886595046603e-05, "loss": 1.2729, "step": 1948 }, { "epoch": 0.41, "learning_rate": 2.6675816060785327e-05, "loss": 1.312, "step": 1949 }, { "epoch": 0.41, "learning_rate": 2.666296951796552e-05, "loss": 1.2742, "step": 1950 }, { "epoch": 0.41, "learning_rate": 2.6650119882164292e-05, "loss": 1.292, "step": 1951 }, { "epoch": 0.41, "learning_rate": 2.663726715934651e-05, "loss": 1.2969, "step": 1952 }, { "epoch": 0.41, "learning_rate": 2.6624411355478463e-05, "loss": 1.2734, "step": 1953 }, { "epoch": 0.41, "learning_rate": 2.661155247652788e-05, "loss": 1.293, "step": 1954 }, { "epoch": 0.41, "learning_rate": 2.6598690528463916e-05, "loss": 1.2688, "step": 1955 }, { "epoch": 0.41, "learning_rate": 2.6585825517257133e-05, "loss": 1.2413, "step": 1956 }, { "epoch": 0.41, "learning_rate": 2.6572957448879547e-05, "loss": 1.2887, "step": 1957 }, { "epoch": 0.41, "learning_rate": 2.656008632930456e-05, "loss": 1.318, "step": 1958 }, { "epoch": 0.41, "learning_rate": 2.654721216450701e-05, "loss": 1.267, "step": 1959 }, { "epoch": 0.41, "learning_rate": 2.653433496046315e-05, "loss": 1.265, "step": 1960 }, { "epoch": 0.41, "learning_rate": 2.652145472315063e-05, "loss": 1.2791, "step": 1961 }, { "epoch": 0.41, "learning_rate": 2.650857145854852e-05, "loss": 1.2736, "step": 1962 }, { "epoch": 0.41, "learning_rate": 2.6495685172637292e-05, "loss": 1.3451, "step": 1963 }, { "epoch": 0.41, "learning_rate": 2.6482795871398815e-05, "loss": 1.279, "step": 1964 }, { "epoch": 0.41, "learning_rate": 2.646990356081637e-05, "loss": 1.2973, "step": 1965 }, { "epoch": 0.41, "learning_rate": 2.645700824687462e-05, "loss": 1.278, "step": 1966 }, { "epoch": 0.41, "learning_rate": 2.644410993555963e-05, "loss": 1.2729, "step": 1967 }, { "epoch": 0.41, "learning_rate": 2.643120863285886e-05, "loss": 1.3152, "step": 1968 }, { "epoch": 0.41, "learning_rate": 2.6418304344761165e-05, "loss": 1.3012, "step": 1969 }, { "epoch": 0.41, "learning_rate": 2.6405397077256752e-05, "loss": 1.2313, "step": 1970 }, { "epoch": 0.41, "learning_rate": 2.6392486836337256e-05, "loss": 1.2733, "step": 1971 }, { "epoch": 0.41, "learning_rate": 2.637957362799566e-05, "loss": 1.2687, "step": 1972 }, { "epoch": 0.41, "learning_rate": 2.636665745822633e-05, "loss": 1.3039, "step": 1973 }, { "epoch": 0.42, "learning_rate": 2.6353738333025022e-05, "loss": 1.2624, "step": 1974 }, { "epoch": 0.42, "learning_rate": 2.6340816258388858e-05, "loss": 1.3124, "step": 1975 }, { "epoch": 0.42, "learning_rate": 2.6327891240316313e-05, "loss": 1.2835, "step": 1976 }, { "epoch": 0.42, "learning_rate": 2.6314963284807246e-05, "loss": 1.2391, "step": 1977 }, { "epoch": 0.42, "learning_rate": 2.630203239786287e-05, "loss": 1.2639, "step": 1978 }, { "epoch": 0.42, "learning_rate": 2.628909858548577e-05, "loss": 1.285, "step": 1979 }, { "epoch": 0.42, "learning_rate": 2.6276161853679877e-05, "loss": 1.2698, "step": 1980 }, { "epoch": 0.42, "learning_rate": 2.626322220845048e-05, "loss": 1.2697, "step": 1981 }, { "epoch": 0.42, "learning_rate": 2.6250279655804232e-05, "loss": 1.3049, "step": 1982 }, { "epoch": 0.42, "learning_rate": 2.6237334201749126e-05, "loss": 1.3179, "step": 1983 }, { "epoch": 0.42, "learning_rate": 2.6224385852294484e-05, "loss": 1.3303, "step": 1984 }, { "epoch": 0.42, "learning_rate": 2.6211434613451006e-05, "loss": 1.2867, "step": 1985 }, { "epoch": 0.42, "learning_rate": 2.6198480491230712e-05, "loss": 1.2793, "step": 1986 }, { "epoch": 0.42, "learning_rate": 2.618552349164697e-05, "loss": 1.2727, "step": 1987 }, { "epoch": 0.42, "learning_rate": 2.6172563620714475e-05, "loss": 1.2906, "step": 1988 }, { "epoch": 0.42, "learning_rate": 2.6159600884449258e-05, "loss": 1.266, "step": 1989 }, { "epoch": 0.42, "learning_rate": 2.6146635288868685e-05, "loss": 1.2422, "step": 1990 }, { "epoch": 0.42, "learning_rate": 2.6133666839991444e-05, "loss": 1.2764, "step": 1991 }, { "epoch": 0.42, "learning_rate": 2.612069554383755e-05, "loss": 1.2452, "step": 1992 }, { "epoch": 0.42, "learning_rate": 2.6107721406428338e-05, "loss": 1.3092, "step": 1993 }, { "epoch": 0.42, "learning_rate": 2.6094744433786467e-05, "loss": 1.2806, "step": 1994 }, { "epoch": 0.42, "learning_rate": 2.6081764631935896e-05, "loss": 1.2924, "step": 1995 }, { "epoch": 0.42, "learning_rate": 2.606878200690193e-05, "loss": 1.2903, "step": 1996 }, { "epoch": 0.42, "learning_rate": 2.605579656471115e-05, "loss": 1.2397, "step": 1997 }, { "epoch": 0.42, "learning_rate": 2.6042808311391456e-05, "loss": 1.2608, "step": 1998 }, { "epoch": 0.42, "learning_rate": 2.6029817252972064e-05, "loss": 1.2555, "step": 1999 }, { "epoch": 0.42, "learning_rate": 2.6016823395483482e-05, "loss": 1.2567, "step": 2000 }, { "epoch": 0.42, "learning_rate": 2.600382674495751e-05, "loss": 1.3133, "step": 2001 }, { "epoch": 0.42, "learning_rate": 2.5990827307427263e-05, "loss": 1.3379, "step": 2002 }, { "epoch": 0.42, "learning_rate": 2.5977825088927135e-05, "loss": 1.1998, "step": 2003 }, { "epoch": 0.42, "learning_rate": 2.5964820095492825e-05, "loss": 1.276, "step": 2004 }, { "epoch": 0.42, "learning_rate": 2.5951812333161298e-05, "loss": 1.2713, "step": 2005 }, { "epoch": 0.42, "learning_rate": 2.593880180797083e-05, "loss": 1.2559, "step": 2006 }, { "epoch": 0.42, "learning_rate": 2.5925788525960964e-05, "loss": 1.3017, "step": 2007 }, { "epoch": 0.42, "learning_rate": 2.5912772493172523e-05, "loss": 1.3025, "step": 2008 }, { "epoch": 0.42, "learning_rate": 2.5899753715647614e-05, "loss": 1.3044, "step": 2009 }, { "epoch": 0.42, "learning_rate": 2.5886732199429606e-05, "loss": 1.2433, "step": 2010 }, { "epoch": 0.42, "learning_rate": 2.587370795056315e-05, "loss": 1.2535, "step": 2011 }, { "epoch": 0.42, "learning_rate": 2.5860680975094178e-05, "loss": 1.2794, "step": 2012 }, { "epoch": 0.42, "learning_rate": 2.5847651279069847e-05, "loss": 1.2642, "step": 2013 }, { "epoch": 0.42, "learning_rate": 2.5834618868538623e-05, "loss": 1.3007, "step": 2014 }, { "epoch": 0.42, "learning_rate": 2.58215837495502e-05, "loss": 1.2566, "step": 2015 }, { "epoch": 0.42, "learning_rate": 2.5808545928155547e-05, "loss": 1.3239, "step": 2016 }, { "epoch": 0.42, "learning_rate": 2.5795505410406878e-05, "loss": 1.2546, "step": 2017 }, { "epoch": 0.42, "learning_rate": 2.5782462202357664e-05, "loss": 1.2982, "step": 2018 }, { "epoch": 0.42, "learning_rate": 2.5769416310062622e-05, "loss": 1.3559, "step": 2019 }, { "epoch": 0.42, "learning_rate": 2.5756367739577713e-05, "loss": 1.259, "step": 2020 }, { "epoch": 0.43, "learning_rate": 2.5743316496960154e-05, "loss": 1.3018, "step": 2021 }, { "epoch": 0.43, "learning_rate": 2.573026258826838e-05, "loss": 1.2526, "step": 2022 }, { "epoch": 0.43, "learning_rate": 2.571720601956208e-05, "loss": 1.2777, "step": 2023 }, { "epoch": 0.43, "learning_rate": 2.570414679690218e-05, "loss": 1.2883, "step": 2024 }, { "epoch": 0.43, "learning_rate": 2.5691084926350825e-05, "loss": 1.2386, "step": 2025 }, { "epoch": 0.43, "learning_rate": 2.56780204139714e-05, "loss": 1.2968, "step": 2026 }, { "epoch": 0.43, "learning_rate": 2.5664953265828504e-05, "loss": 1.2461, "step": 2027 }, { "epoch": 0.43, "learning_rate": 2.565188348798798e-05, "loss": 1.2403, "step": 2028 }, { "epoch": 0.43, "learning_rate": 2.5638811086516873e-05, "loss": 1.2555, "step": 2029 }, { "epoch": 0.43, "learning_rate": 2.562573606748345e-05, "loss": 1.2388, "step": 2030 }, { "epoch": 0.43, "learning_rate": 2.5612658436957204e-05, "loss": 1.2985, "step": 2031 }, { "epoch": 0.43, "learning_rate": 2.5599578201008824e-05, "loss": 1.3127, "step": 2032 }, { "epoch": 0.43, "learning_rate": 2.5586495365710225e-05, "loss": 1.297, "step": 2033 }, { "epoch": 0.43, "learning_rate": 2.5573409937134508e-05, "loss": 1.2838, "step": 2034 }, { "epoch": 0.43, "learning_rate": 2.5560321921355996e-05, "loss": 1.2908, "step": 2035 }, { "epoch": 0.43, "learning_rate": 2.554723132445021e-05, "loss": 1.2932, "step": 2036 }, { "epoch": 0.43, "learning_rate": 2.5534138152493863e-05, "loss": 1.2327, "step": 2037 }, { "epoch": 0.43, "learning_rate": 2.5521042411564866e-05, "loss": 1.2842, "step": 2038 }, { "epoch": 0.43, "learning_rate": 2.5507944107742314e-05, "loss": 1.2342, "step": 2039 }, { "epoch": 0.43, "learning_rate": 2.549484324710652e-05, "loss": 1.3055, "step": 2040 }, { "epoch": 0.43, "learning_rate": 2.548173983573895e-05, "loss": 1.2898, "step": 2041 }, { "epoch": 0.43, "learning_rate": 2.5468633879722272e-05, "loss": 1.2622, "step": 2042 }, { "epoch": 0.43, "learning_rate": 2.545552538514033e-05, "loss": 1.3072, "step": 2043 }, { "epoch": 0.43, "learning_rate": 2.5442414358078148e-05, "loss": 1.2473, "step": 2044 }, { "epoch": 0.43, "learning_rate": 2.5429300804621934e-05, "loss": 1.2588, "step": 2045 }, { "epoch": 0.43, "learning_rate": 2.541618473085905e-05, "loss": 1.293, "step": 2046 }, { "epoch": 0.43, "learning_rate": 2.5403066142878047e-05, "loss": 1.2812, "step": 2047 }, { "epoch": 0.43, "learning_rate": 2.538994504676862e-05, "loss": 1.2819, "step": 2048 }, { "epoch": 0.43, "learning_rate": 2.537682144862166e-05, "loss": 1.2955, "step": 2049 }, { "epoch": 0.43, "learning_rate": 2.53636953545292e-05, "loss": 1.2705, "step": 2050 }, { "epoch": 0.43, "learning_rate": 2.5350566770584423e-05, "loss": 1.303, "step": 2051 }, { "epoch": 0.43, "learning_rate": 2.5337435702881683e-05, "loss": 1.2637, "step": 2052 }, { "epoch": 0.43, "learning_rate": 2.5324302157516486e-05, "loss": 1.2481, "step": 2053 }, { "epoch": 0.43, "learning_rate": 2.531116614058548e-05, "loss": 1.3105, "step": 2054 }, { "epoch": 0.43, "learning_rate": 2.5298027658186472e-05, "loss": 1.2549, "step": 2055 }, { "epoch": 0.43, "learning_rate": 2.52848867164184e-05, "loss": 1.2724, "step": 2056 }, { "epoch": 0.43, "learning_rate": 2.5271743321381354e-05, "loss": 1.2496, "step": 2057 }, { "epoch": 0.43, "learning_rate": 2.525859747917656e-05, "loss": 1.2614, "step": 2058 }, { "epoch": 0.43, "learning_rate": 2.524544919590638e-05, "loss": 1.2755, "step": 2059 }, { "epoch": 0.43, "learning_rate": 2.5232298477674297e-05, "loss": 1.2589, "step": 2060 }, { "epoch": 0.43, "learning_rate": 2.5219145330584945e-05, "loss": 1.2813, "step": 2061 }, { "epoch": 0.43, "learning_rate": 2.5205989760744084e-05, "loss": 1.2259, "step": 2062 }, { "epoch": 0.43, "learning_rate": 2.5192831774258575e-05, "loss": 1.2194, "step": 2063 }, { "epoch": 0.43, "learning_rate": 2.5179671377236422e-05, "loss": 1.2996, "step": 2064 }, { "epoch": 0.43, "learning_rate": 2.516650857578674e-05, "loss": 1.2554, "step": 2065 }, { "epoch": 0.43, "learning_rate": 2.515334337601977e-05, "loss": 1.2868, "step": 2066 }, { "epoch": 0.43, "learning_rate": 2.5140175784046858e-05, "loss": 1.2312, "step": 2067 }, { "epoch": 0.43, "learning_rate": 2.512700580598045e-05, "loss": 1.3, "step": 2068 }, { "epoch": 0.44, "learning_rate": 2.5113833447934126e-05, "loss": 1.3146, "step": 2069 }, { "epoch": 0.44, "learning_rate": 2.510065871602255e-05, "loss": 1.2799, "step": 2070 }, { "epoch": 0.44, "learning_rate": 2.5087481616361493e-05, "loss": 1.2765, "step": 2071 }, { "epoch": 0.44, "learning_rate": 2.5074302155067823e-05, "loss": 1.2673, "step": 2072 }, { "epoch": 0.44, "learning_rate": 2.5061120338259512e-05, "loss": 1.2831, "step": 2073 }, { "epoch": 0.44, "learning_rate": 2.5047936172055613e-05, "loss": 1.2831, "step": 2074 }, { "epoch": 0.44, "learning_rate": 2.5034749662576293e-05, "loss": 1.3215, "step": 2075 }, { "epoch": 0.44, "learning_rate": 2.5021560815942777e-05, "loss": 1.2723, "step": 2076 }, { "epoch": 0.44, "learning_rate": 2.5008369638277382e-05, "loss": 1.2905, "step": 2077 }, { "epoch": 0.44, "learning_rate": 2.4995176135703533e-05, "loss": 1.2612, "step": 2078 }, { "epoch": 0.44, "learning_rate": 2.498198031434571e-05, "loss": 1.3296, "step": 2079 }, { "epoch": 0.44, "learning_rate": 2.496878218032947e-05, "loss": 1.2701, "step": 2080 }, { "epoch": 0.44, "learning_rate": 2.495558173978145e-05, "loss": 1.2911, "step": 2081 }, { "epoch": 0.44, "learning_rate": 2.494237899882935e-05, "loss": 1.3086, "step": 2082 }, { "epoch": 0.44, "learning_rate": 2.4929173963601958e-05, "loss": 1.2568, "step": 2083 }, { "epoch": 0.44, "learning_rate": 2.4915966640229098e-05, "loss": 1.2638, "step": 2084 }, { "epoch": 0.44, "learning_rate": 2.4902757034841674e-05, "loss": 1.2551, "step": 2085 }, { "epoch": 0.44, "learning_rate": 2.4889545153571657e-05, "loss": 1.3159, "step": 2086 }, { "epoch": 0.44, "learning_rate": 2.4876331002552055e-05, "loss": 1.2429, "step": 2087 }, { "epoch": 0.44, "learning_rate": 2.4863114587916933e-05, "loss": 1.2593, "step": 2088 }, { "epoch": 0.44, "learning_rate": 2.484989591580142e-05, "loss": 1.29, "step": 2089 }, { "epoch": 0.44, "learning_rate": 2.4836674992341684e-05, "loss": 1.2493, "step": 2090 }, { "epoch": 0.44, "learning_rate": 2.4823451823674943e-05, "loss": 1.2658, "step": 2091 }, { "epoch": 0.44, "learning_rate": 2.481022641593944e-05, "loss": 1.2678, "step": 2092 }, { "epoch": 0.44, "learning_rate": 2.4796998775274482e-05, "loss": 1.2909, "step": 2093 }, { "epoch": 0.44, "learning_rate": 2.4783768907820403e-05, "loss": 1.3031, "step": 2094 }, { "epoch": 0.44, "learning_rate": 2.4770536819718562e-05, "loss": 1.2859, "step": 2095 }, { "epoch": 0.44, "learning_rate": 2.475730251711136e-05, "loss": 1.2783, "step": 2096 }, { "epoch": 0.44, "learning_rate": 2.4744066006142218e-05, "loss": 1.2627, "step": 2097 }, { "epoch": 0.44, "learning_rate": 2.4730827292955592e-05, "loss": 1.2579, "step": 2098 }, { "epoch": 0.44, "learning_rate": 2.4717586383696947e-05, "loss": 1.2372, "step": 2099 }, { "epoch": 0.44, "learning_rate": 2.470434328451278e-05, "loss": 1.2901, "step": 2100 }, { "epoch": 0.44, "learning_rate": 2.4691098001550588e-05, "loss": 1.2395, "step": 2101 }, { "epoch": 0.44, "learning_rate": 2.4677850540958906e-05, "loss": 1.2388, "step": 2102 }, { "epoch": 0.44, "learning_rate": 2.4664600908887272e-05, "loss": 1.2986, "step": 2103 }, { "epoch": 0.44, "learning_rate": 2.4651349111486212e-05, "loss": 1.3011, "step": 2104 }, { "epoch": 0.44, "learning_rate": 2.4638095154907276e-05, "loss": 1.3088, "step": 2105 }, { "epoch": 0.44, "learning_rate": 2.4624839045303014e-05, "loss": 1.3081, "step": 2106 }, { "epoch": 0.44, "learning_rate": 2.4611580788826973e-05, "loss": 1.2722, "step": 2107 }, { "epoch": 0.44, "learning_rate": 2.4598320391633702e-05, "loss": 1.2768, "step": 2108 }, { "epoch": 0.44, "learning_rate": 2.4585057859878732e-05, "loss": 1.3224, "step": 2109 }, { "epoch": 0.44, "learning_rate": 2.4571793199718593e-05, "loss": 1.3039, "step": 2110 }, { "epoch": 0.44, "learning_rate": 2.4558526417310805e-05, "loss": 1.2917, "step": 2111 }, { "epoch": 0.44, "learning_rate": 2.4545257518813866e-05, "loss": 1.2374, "step": 2112 }, { "epoch": 0.44, "learning_rate": 2.4531986510387268e-05, "loss": 1.2449, "step": 2113 }, { "epoch": 0.44, "learning_rate": 2.4518713398191464e-05, "loss": 1.2528, "step": 2114 }, { "epoch": 0.44, "learning_rate": 2.45054381883879e-05, "loss": 1.2509, "step": 2115 }, { "epoch": 0.45, "learning_rate": 2.4492160887138998e-05, "loss": 1.2794, "step": 2116 }, { "epoch": 0.45, "learning_rate": 2.447888150060813e-05, "loss": 1.2612, "step": 2117 }, { "epoch": 0.45, "learning_rate": 2.4465600034959654e-05, "loss": 1.2718, "step": 2118 }, { "epoch": 0.45, "learning_rate": 2.4452316496358885e-05, "loss": 1.313, "step": 2119 }, { "epoch": 0.45, "learning_rate": 2.443903089097211e-05, "loss": 1.2596, "step": 2120 }, { "epoch": 0.45, "learning_rate": 2.4425743224966567e-05, "loss": 1.267, "step": 2121 }, { "epoch": 0.45, "learning_rate": 2.4412453504510447e-05, "loss": 1.2455, "step": 2122 }, { "epoch": 0.45, "learning_rate": 2.43991617357729e-05, "loss": 1.3376, "step": 2123 }, { "epoch": 0.45, "learning_rate": 2.4385867924924037e-05, "loss": 1.3297, "step": 2124 }, { "epoch": 0.45, "learning_rate": 2.43725720781349e-05, "loss": 1.2579, "step": 2125 }, { "epoch": 0.45, "learning_rate": 2.4359274201577478e-05, "loss": 1.3401, "step": 2126 }, { "epoch": 0.45, "learning_rate": 2.4345974301424717e-05, "loss": 1.2648, "step": 2127 }, { "epoch": 0.45, "learning_rate": 2.433267238385048e-05, "loss": 1.2432, "step": 2128 }, { "epoch": 0.45, "learning_rate": 2.4319368455029598e-05, "loss": 1.2588, "step": 2129 }, { "epoch": 0.45, "learning_rate": 2.43060625211378e-05, "loss": 1.288, "step": 2130 }, { "epoch": 0.45, "learning_rate": 2.4292754588351768e-05, "loss": 1.2282, "step": 2131 }, { "epoch": 0.45, "learning_rate": 2.42794446628491e-05, "loss": 1.293, "step": 2132 }, { "epoch": 0.45, "learning_rate": 2.426613275080834e-05, "loss": 1.307, "step": 2133 }, { "epoch": 0.45, "learning_rate": 2.4252818858408923e-05, "loss": 1.3079, "step": 2134 }, { "epoch": 0.45, "learning_rate": 2.4239502991831233e-05, "loss": 1.284, "step": 2135 }, { "epoch": 0.45, "learning_rate": 2.4226185157256546e-05, "loss": 1.2037, "step": 2136 }, { "epoch": 0.45, "learning_rate": 2.421286536086707e-05, "loss": 1.2519, "step": 2137 }, { "epoch": 0.45, "learning_rate": 2.4199543608845916e-05, "loss": 1.2597, "step": 2138 }, { "epoch": 0.45, "learning_rate": 2.4186219907377097e-05, "loss": 1.2732, "step": 2139 }, { "epoch": 0.45, "learning_rate": 2.4172894262645544e-05, "loss": 1.3214, "step": 2140 }, { "epoch": 0.45, "learning_rate": 2.4159566680837086e-05, "loss": 1.278, "step": 2141 }, { "epoch": 0.45, "learning_rate": 2.414623716813844e-05, "loss": 1.2699, "step": 2142 }, { "epoch": 0.45, "learning_rate": 2.413290573073723e-05, "loss": 1.3015, "step": 2143 }, { "epoch": 0.45, "learning_rate": 2.4119572374821968e-05, "loss": 1.2712, "step": 2144 }, { "epoch": 0.45, "learning_rate": 2.4106237106582072e-05, "loss": 1.2545, "step": 2145 }, { "epoch": 0.45, "learning_rate": 2.4092899932207824e-05, "loss": 1.2726, "step": 2146 }, { "epoch": 0.45, "learning_rate": 2.4079560857890405e-05, "loss": 1.2493, "step": 2147 }, { "epoch": 0.45, "learning_rate": 2.406621988982188e-05, "loss": 1.2737, "step": 2148 }, { "epoch": 0.45, "learning_rate": 2.405287703419518e-05, "loss": 1.2957, "step": 2149 }, { "epoch": 0.45, "learning_rate": 2.4039532297204125e-05, "loss": 1.2383, "step": 2150 }, { "epoch": 0.45, "learning_rate": 2.4026185685043405e-05, "loss": 1.2334, "step": 2151 }, { "epoch": 0.45, "learning_rate": 2.4012837203908582e-05, "loss": 1.2657, "step": 2152 }, { "epoch": 0.45, "learning_rate": 2.3999486859996073e-05, "loss": 1.2939, "step": 2153 }, { "epoch": 0.45, "learning_rate": 2.3986134659503187e-05, "loss": 1.246, "step": 2154 }, { "epoch": 0.45, "learning_rate": 2.3972780608628057e-05, "loss": 1.3431, "step": 2155 }, { "epoch": 0.45, "learning_rate": 2.3959424713569708e-05, "loss": 1.2937, "step": 2156 }, { "epoch": 0.45, "learning_rate": 2.3946066980528e-05, "loss": 1.2899, "step": 2157 }, { "epoch": 0.45, "learning_rate": 2.3932707415703673e-05, "loss": 1.2751, "step": 2158 }, { "epoch": 0.45, "learning_rate": 2.391934602529828e-05, "loss": 1.2325, "step": 2159 }, { "epoch": 0.45, "learning_rate": 2.3905982815514243e-05, "loss": 1.2628, "step": 2160 }, { "epoch": 0.45, "learning_rate": 2.3892617792554833e-05, "loss": 1.2681, "step": 2161 }, { "epoch": 0.45, "learning_rate": 2.3879250962624152e-05, "loss": 1.29, "step": 2162 }, { "epoch": 0.45, "learning_rate": 2.386588233192715e-05, "loss": 1.2623, "step": 2163 }, { "epoch": 0.46, "learning_rate": 2.38525119066696e-05, "loss": 1.29, "step": 2164 }, { "epoch": 0.46, "learning_rate": 2.3839139693058116e-05, "loss": 1.268, "step": 2165 }, { "epoch": 0.46, "learning_rate": 2.382576569730015e-05, "loss": 1.2672, "step": 2166 }, { "epoch": 0.46, "learning_rate": 2.3812389925603963e-05, "loss": 1.226, "step": 2167 }, { "epoch": 0.46, "learning_rate": 2.3799012384178654e-05, "loss": 1.2533, "step": 2168 }, { "epoch": 0.46, "learning_rate": 2.3785633079234144e-05, "loss": 1.2878, "step": 2169 }, { "epoch": 0.46, "learning_rate": 2.377225201698117e-05, "loss": 1.2776, "step": 2170 }, { "epoch": 0.46, "learning_rate": 2.3758869203631266e-05, "loss": 1.3176, "step": 2171 }, { "epoch": 0.46, "learning_rate": 2.3745484645396816e-05, "loss": 1.2473, "step": 2172 }, { "epoch": 0.46, "learning_rate": 2.373209834849098e-05, "loss": 1.2396, "step": 2173 }, { "epoch": 0.46, "learning_rate": 2.3718710319127755e-05, "loss": 1.2944, "step": 2174 }, { "epoch": 0.46, "learning_rate": 2.370532056352191e-05, "loss": 1.2887, "step": 2175 }, { "epoch": 0.46, "learning_rate": 2.3691929087889042e-05, "loss": 1.2868, "step": 2176 }, { "epoch": 0.46, "learning_rate": 2.3678535898445533e-05, "loss": 1.2609, "step": 2177 }, { "epoch": 0.46, "learning_rate": 2.3665141001408562e-05, "loss": 1.2767, "step": 2178 }, { "epoch": 0.46, "learning_rate": 2.3651744402996114e-05, "loss": 1.2674, "step": 2179 }, { "epoch": 0.46, "learning_rate": 2.3638346109426932e-05, "loss": 1.2839, "step": 2180 }, { "epoch": 0.46, "learning_rate": 2.362494612692058e-05, "loss": 1.3094, "step": 2181 }, { "epoch": 0.46, "learning_rate": 2.361154446169739e-05, "loss": 1.2948, "step": 2182 }, { "epoch": 0.46, "learning_rate": 2.3598141119978482e-05, "loss": 1.3063, "step": 2183 }, { "epoch": 0.46, "learning_rate": 2.3584736107985737e-05, "loss": 1.2767, "step": 2184 }, { "epoch": 0.46, "learning_rate": 2.357132943194183e-05, "loss": 1.2409, "step": 2185 }, { "epoch": 0.46, "learning_rate": 2.35579210980702e-05, "loss": 1.3158, "step": 2186 }, { "epoch": 0.46, "learning_rate": 2.3544511112595068e-05, "loss": 1.2893, "step": 2187 }, { "epoch": 0.46, "learning_rate": 2.3531099481741403e-05, "loss": 1.2235, "step": 2188 }, { "epoch": 0.46, "learning_rate": 2.351768621173495e-05, "loss": 1.2892, "step": 2189 }, { "epoch": 0.46, "learning_rate": 2.3504271308802204e-05, "loss": 1.2643, "step": 2190 }, { "epoch": 0.46, "learning_rate": 2.3490854779170436e-05, "loss": 1.293, "step": 2191 }, { "epoch": 0.46, "learning_rate": 2.347743662906765e-05, "loss": 1.2984, "step": 2192 }, { "epoch": 0.46, "learning_rate": 2.3464016864722625e-05, "loss": 1.2651, "step": 2193 }, { "epoch": 0.46, "learning_rate": 2.345059549236487e-05, "loss": 1.2821, "step": 2194 }, { "epoch": 0.46, "learning_rate": 2.343717251822465e-05, "loss": 1.2066, "step": 2195 }, { "epoch": 0.46, "learning_rate": 2.3423747948532976e-05, "loss": 1.2939, "step": 2196 }, { "epoch": 0.46, "learning_rate": 2.341032178952159e-05, "loss": 1.2487, "step": 2197 }, { "epoch": 0.46, "learning_rate": 2.339689404742298e-05, "loss": 1.2796, "step": 2198 }, { "epoch": 0.46, "learning_rate": 2.338346472847037e-05, "loss": 1.2045, "step": 2199 }, { "epoch": 0.46, "learning_rate": 2.3370033838897702e-05, "loss": 1.2245, "step": 2200 }, { "epoch": 0.46, "learning_rate": 2.3356601384939665e-05, "loss": 1.3075, "step": 2201 }, { "epoch": 0.46, "learning_rate": 2.3343167372831665e-05, "loss": 1.309, "step": 2202 }, { "epoch": 0.46, "learning_rate": 2.3329731808809836e-05, "loss": 1.3084, "step": 2203 }, { "epoch": 0.46, "learning_rate": 2.331629469911103e-05, "loss": 1.2633, "step": 2204 }, { "epoch": 0.46, "learning_rate": 2.330285604997281e-05, "loss": 1.2671, "step": 2205 }, { "epoch": 0.46, "learning_rate": 2.328941586763346e-05, "loss": 1.2656, "step": 2206 }, { "epoch": 0.46, "learning_rate": 2.3275974158331977e-05, "loss": 1.2829, "step": 2207 }, { "epoch": 0.46, "learning_rate": 2.3262530928308068e-05, "loss": 1.2457, "step": 2208 }, { "epoch": 0.46, "learning_rate": 2.3249086183802137e-05, "loss": 1.2854, "step": 2209 }, { "epoch": 0.46, "learning_rate": 2.32356399310553e-05, "loss": 1.2401, "step": 2210 }, { "epoch": 0.47, "learning_rate": 2.3222192176309367e-05, "loss": 1.2986, "step": 2211 }, { "epoch": 0.47, "learning_rate": 2.320874292580685e-05, "loss": 1.3068, "step": 2212 }, { "epoch": 0.47, "learning_rate": 2.3195292185790957e-05, "loss": 1.3193, "step": 2213 }, { "epoch": 0.47, "learning_rate": 2.318183996250558e-05, "loss": 1.2133, "step": 2214 }, { "epoch": 0.47, "learning_rate": 2.3168386262195307e-05, "loss": 1.2821, "step": 2215 }, { "epoch": 0.47, "learning_rate": 2.315493109110541e-05, "loss": 1.2341, "step": 2216 }, { "epoch": 0.47, "learning_rate": 2.314147445548183e-05, "loss": 1.2578, "step": 2217 }, { "epoch": 0.47, "learning_rate": 2.3128016361571213e-05, "loss": 1.3141, "step": 2218 }, { "epoch": 0.47, "learning_rate": 2.3114556815620863e-05, "loss": 1.3132, "step": 2219 }, { "epoch": 0.47, "learning_rate": 2.3101095823878764e-05, "loss": 1.3046, "step": 2220 }, { "epoch": 0.47, "learning_rate": 2.308763339259357e-05, "loss": 1.2741, "step": 2221 }, { "epoch": 0.47, "learning_rate": 2.3074169528014605e-05, "loss": 1.2767, "step": 2222 }, { "epoch": 0.47, "learning_rate": 2.306070423639186e-05, "loss": 1.264, "step": 2223 }, { "epoch": 0.47, "learning_rate": 2.3047237523975984e-05, "loss": 1.263, "step": 2224 }, { "epoch": 0.47, "learning_rate": 2.3033769397018286e-05, "loss": 1.2069, "step": 2225 }, { "epoch": 0.47, "learning_rate": 2.3020299861770732e-05, "loss": 1.2284, "step": 2226 }, { "epoch": 0.47, "learning_rate": 2.300682892448595e-05, "loss": 1.2452, "step": 2227 }, { "epoch": 0.47, "learning_rate": 2.2993356591417203e-05, "loss": 1.2717, "step": 2228 }, { "epoch": 0.47, "learning_rate": 2.2979882868818422e-05, "loss": 1.263, "step": 2229 }, { "epoch": 0.47, "learning_rate": 2.296640776294416e-05, "loss": 1.2966, "step": 2230 }, { "epoch": 0.47, "learning_rate": 2.2952931280049628e-05, "loss": 1.2989, "step": 2231 }, { "epoch": 0.47, "learning_rate": 2.293945342639067e-05, "loss": 1.2976, "step": 2232 }, { "epoch": 0.47, "learning_rate": 2.2925974208223778e-05, "loss": 1.27, "step": 2233 }, { "epoch": 0.47, "learning_rate": 2.2912493631806055e-05, "loss": 1.2463, "step": 2234 }, { "epoch": 0.47, "learning_rate": 2.2899011703395254e-05, "loss": 1.2726, "step": 2235 }, { "epoch": 0.47, "learning_rate": 2.288552842924974e-05, "loss": 1.2412, "step": 2236 }, { "epoch": 0.47, "learning_rate": 2.2872043815628525e-05, "loss": 1.2547, "step": 2237 }, { "epoch": 0.47, "learning_rate": 2.2858557868791222e-05, "loss": 1.289, "step": 2238 }, { "epoch": 0.47, "learning_rate": 2.284507059499807e-05, "loss": 1.2979, "step": 2239 }, { "epoch": 0.47, "learning_rate": 2.283158200050993e-05, "loss": 1.2521, "step": 2240 }, { "epoch": 0.47, "learning_rate": 2.2818092091588266e-05, "loss": 1.2675, "step": 2241 }, { "epoch": 0.47, "learning_rate": 2.280460087449515e-05, "loss": 1.2783, "step": 2242 }, { "epoch": 0.47, "learning_rate": 2.2791108355493278e-05, "loss": 1.2532, "step": 2243 }, { "epoch": 0.47, "learning_rate": 2.2777614540845934e-05, "loss": 1.2963, "step": 2244 }, { "epoch": 0.47, "learning_rate": 2.2764119436817015e-05, "loss": 1.2672, "step": 2245 }, { "epoch": 0.47, "learning_rate": 2.2750623049671003e-05, "loss": 1.2974, "step": 2246 }, { "epoch": 0.47, "learning_rate": 2.273712538567299e-05, "loss": 1.2806, "step": 2247 }, { "epoch": 0.47, "learning_rate": 2.2723626451088644e-05, "loss": 1.3214, "step": 2248 }, { "epoch": 0.47, "learning_rate": 2.2710126252184255e-05, "loss": 1.307, "step": 2249 }, { "epoch": 0.47, "learning_rate": 2.2696624795226662e-05, "loss": 1.2484, "step": 2250 }, { "epoch": 0.47, "learning_rate": 2.2683122086483297e-05, "loss": 1.24, "step": 2251 }, { "epoch": 0.47, "learning_rate": 2.26696181322222e-05, "loss": 1.2795, "step": 2252 }, { "epoch": 0.47, "learning_rate": 2.2656112938711952e-05, "loss": 1.2912, "step": 2253 }, { "epoch": 0.47, "learning_rate": 2.264260651222174e-05, "loss": 1.2853, "step": 2254 }, { "epoch": 0.47, "learning_rate": 2.26290988590213e-05, "loss": 1.2958, "step": 2255 }, { "epoch": 0.47, "learning_rate": 2.261558998538095e-05, "loss": 1.2503, "step": 2256 }, { "epoch": 0.47, "learning_rate": 2.2602079897571576e-05, "loss": 1.295, "step": 2257 }, { "epoch": 0.47, "learning_rate": 2.258856860186462e-05, "loss": 1.269, "step": 2258 }, { "epoch": 0.48, "learning_rate": 2.257505610453209e-05, "loss": 1.2443, "step": 2259 }, { "epoch": 0.48, "learning_rate": 2.2561542411846537e-05, "loss": 1.3001, "step": 2260 }, { "epoch": 0.48, "learning_rate": 2.25480275300811e-05, "loss": 1.2766, "step": 2261 }, { "epoch": 0.48, "learning_rate": 2.253451146550945e-05, "loss": 1.3115, "step": 2262 }, { "epoch": 0.48, "learning_rate": 2.252099422440579e-05, "loss": 1.3036, "step": 2263 }, { "epoch": 0.48, "learning_rate": 2.2507475813044896e-05, "loss": 1.3008, "step": 2264 }, { "epoch": 0.48, "learning_rate": 2.2493956237702075e-05, "loss": 1.2758, "step": 2265 }, { "epoch": 0.48, "learning_rate": 2.2480435504653185e-05, "loss": 1.2717, "step": 2266 }, { "epoch": 0.48, "learning_rate": 2.24669136201746e-05, "loss": 1.2827, "step": 2267 }, { "epoch": 0.48, "learning_rate": 2.2453390590543246e-05, "loss": 1.2494, "step": 2268 }, { "epoch": 0.48, "learning_rate": 2.243986642203658e-05, "loss": 1.2719, "step": 2269 }, { "epoch": 0.48, "learning_rate": 2.2426341120932582e-05, "loss": 1.2765, "step": 2270 }, { "epoch": 0.48, "learning_rate": 2.241281469350976e-05, "loss": 1.2572, "step": 2271 }, { "epoch": 0.48, "learning_rate": 2.2399287146047137e-05, "loss": 1.2562, "step": 2272 }, { "epoch": 0.48, "learning_rate": 2.2385758484824275e-05, "loss": 1.2728, "step": 2273 }, { "epoch": 0.48, "learning_rate": 2.2372228716121246e-05, "loss": 1.3079, "step": 2274 }, { "epoch": 0.48, "learning_rate": 2.235869784621861e-05, "loss": 1.2781, "step": 2275 }, { "epoch": 0.48, "learning_rate": 2.2345165881397475e-05, "loss": 1.298, "step": 2276 }, { "epoch": 0.48, "learning_rate": 2.2331632827939438e-05, "loss": 1.2634, "step": 2277 }, { "epoch": 0.48, "learning_rate": 2.231809869212661e-05, "loss": 1.2548, "step": 2278 }, { "epoch": 0.48, "learning_rate": 2.230456348024159e-05, "loss": 1.2609, "step": 2279 }, { "epoch": 0.48, "learning_rate": 2.2291027198567502e-05, "loss": 1.304, "step": 2280 }, { "epoch": 0.48, "learning_rate": 2.2277489853387932e-05, "loss": 1.2894, "step": 2281 }, { "epoch": 0.48, "learning_rate": 2.2263951450986987e-05, "loss": 1.2655, "step": 2282 }, { "epoch": 0.48, "learning_rate": 2.2250411997649266e-05, "loss": 1.2448, "step": 2283 }, { "epoch": 0.48, "learning_rate": 2.2236871499659824e-05, "loss": 1.3025, "step": 2284 }, { "epoch": 0.48, "learning_rate": 2.2223329963304242e-05, "loss": 1.2805, "step": 2285 }, { "epoch": 0.48, "learning_rate": 2.2209787394868562e-05, "loss": 1.2748, "step": 2286 }, { "epoch": 0.48, "learning_rate": 2.2196243800639303e-05, "loss": 1.2832, "step": 2287 }, { "epoch": 0.48, "learning_rate": 2.2182699186903462e-05, "loss": 1.2327, "step": 2288 }, { "epoch": 0.48, "learning_rate": 2.2169153559948513e-05, "loss": 1.303, "step": 2289 }, { "epoch": 0.48, "learning_rate": 2.21556069260624e-05, "loss": 1.2921, "step": 2290 }, { "epoch": 0.48, "learning_rate": 2.2142059291533542e-05, "loss": 1.2519, "step": 2291 }, { "epoch": 0.48, "learning_rate": 2.2128510662650796e-05, "loss": 1.2517, "step": 2292 }, { "epoch": 0.48, "learning_rate": 2.211496104570351e-05, "loss": 1.2842, "step": 2293 }, { "epoch": 0.48, "learning_rate": 2.210141044698148e-05, "loss": 1.2987, "step": 2294 }, { "epoch": 0.48, "learning_rate": 2.2087858872774954e-05, "loss": 1.2548, "step": 2295 }, { "epoch": 0.48, "learning_rate": 2.2074306329374636e-05, "loss": 1.3158, "step": 2296 }, { "epoch": 0.48, "learning_rate": 2.206075282307168e-05, "loss": 1.2867, "step": 2297 }, { "epoch": 0.48, "learning_rate": 2.2047198360157683e-05, "loss": 1.282, "step": 2298 }, { "epoch": 0.48, "learning_rate": 2.2033642946924698e-05, "loss": 1.2701, "step": 2299 }, { "epoch": 0.48, "learning_rate": 2.2020086589665203e-05, "loss": 1.23, "step": 2300 }, { "epoch": 0.48, "learning_rate": 2.2006529294672126e-05, "loss": 1.2628, "step": 2301 }, { "epoch": 0.48, "learning_rate": 2.1992971068238826e-05, "loss": 1.2468, "step": 2302 }, { "epoch": 0.48, "learning_rate": 2.197941191665909e-05, "loss": 1.269, "step": 2303 }, { "epoch": 0.48, "learning_rate": 2.196585184622715e-05, "loss": 1.2319, "step": 2304 }, { "epoch": 0.48, "learning_rate": 2.195229086323764e-05, "loss": 1.2721, "step": 2305 }, { "epoch": 0.49, "learning_rate": 2.193872897398564e-05, "loss": 1.2655, "step": 2306 }, { "epoch": 0.49, "learning_rate": 2.1925166184766636e-05, "loss": 1.2863, "step": 2307 }, { "epoch": 0.49, "learning_rate": 2.1911602501876546e-05, "loss": 1.2351, "step": 2308 }, { "epoch": 0.49, "learning_rate": 2.1898037931611688e-05, "loss": 1.2753, "step": 2309 }, { "epoch": 0.49, "learning_rate": 2.1884472480268806e-05, "loss": 1.2352, "step": 2310 }, { "epoch": 0.49, "learning_rate": 2.1870906154145035e-05, "loss": 1.2392, "step": 2311 }, { "epoch": 0.49, "learning_rate": 2.185733895953794e-05, "loss": 1.2358, "step": 2312 }, { "epoch": 0.49, "learning_rate": 2.1843770902745462e-05, "loss": 1.283, "step": 2313 }, { "epoch": 0.49, "learning_rate": 2.1830201990065966e-05, "loss": 1.2299, "step": 2314 }, { "epoch": 0.49, "learning_rate": 2.1816632227798196e-05, "loss": 1.3021, "step": 2315 }, { "epoch": 0.49, "learning_rate": 2.180306162224131e-05, "loss": 1.2538, "step": 2316 }, { "epoch": 0.49, "learning_rate": 2.1789490179694833e-05, "loss": 1.2436, "step": 2317 }, { "epoch": 0.49, "learning_rate": 2.1775917906458698e-05, "loss": 1.2869, "step": 2318 }, { "epoch": 0.49, "learning_rate": 2.176234480883322e-05, "loss": 1.2631, "step": 2319 }, { "epoch": 0.49, "learning_rate": 2.174877089311909e-05, "loss": 1.2608, "step": 2320 }, { "epoch": 0.49, "learning_rate": 2.1735196165617385e-05, "loss": 1.1975, "step": 2321 }, { "epoch": 0.49, "learning_rate": 2.1721620632629552e-05, "loss": 1.2808, "step": 2322 }, { "epoch": 0.49, "learning_rate": 2.1708044300457423e-05, "loss": 1.2342, "step": 2323 }, { "epoch": 0.49, "learning_rate": 2.1694467175403197e-05, "loss": 1.3237, "step": 2324 }, { "epoch": 0.49, "learning_rate": 2.1680889263769425e-05, "loss": 1.2427, "step": 2325 }, { "epoch": 0.49, "learning_rate": 2.166731057185905e-05, "loss": 1.2862, "step": 2326 }, { "epoch": 0.49, "learning_rate": 2.1653731105975355e-05, "loss": 1.3005, "step": 2327 }, { "epoch": 0.49, "learning_rate": 2.1640150872421997e-05, "loss": 1.2493, "step": 2328 }, { "epoch": 0.49, "learning_rate": 2.1626569877502985e-05, "loss": 1.2957, "step": 2329 }, { "epoch": 0.49, "learning_rate": 2.161298812752267e-05, "loss": 1.2392, "step": 2330 }, { "epoch": 0.49, "learning_rate": 2.1599405628785773e-05, "loss": 1.247, "step": 2331 }, { "epoch": 0.49, "learning_rate": 2.158582238759735e-05, "loss": 1.3058, "step": 2332 }, { "epoch": 0.49, "learning_rate": 2.15722384102628e-05, "loss": 1.281, "step": 2333 }, { "epoch": 0.49, "learning_rate": 2.1558653703087876e-05, "loss": 1.226, "step": 2334 }, { "epoch": 0.49, "learning_rate": 2.1545068272378664e-05, "loss": 1.308, "step": 2335 }, { "epoch": 0.49, "learning_rate": 2.1531482124441574e-05, "loss": 1.264, "step": 2336 }, { "epoch": 0.49, "learning_rate": 2.151789526558337e-05, "loss": 1.2489, "step": 2337 }, { "epoch": 0.49, "learning_rate": 2.1504307702111125e-05, "loss": 1.2956, "step": 2338 }, { "epoch": 0.49, "learning_rate": 2.1490719440332252e-05, "loss": 1.2503, "step": 2339 }, { "epoch": 0.49, "learning_rate": 2.147713048655449e-05, "loss": 1.2541, "step": 2340 }, { "epoch": 0.49, "learning_rate": 2.1463540847085892e-05, "loss": 1.3193, "step": 2341 }, { "epoch": 0.49, "learning_rate": 2.1449950528234828e-05, "loss": 1.3139, "step": 2342 }, { "epoch": 0.49, "learning_rate": 2.143635953630999e-05, "loss": 1.2783, "step": 2343 }, { "epoch": 0.49, "learning_rate": 2.1422767877620382e-05, "loss": 1.2414, "step": 2344 }, { "epoch": 0.49, "learning_rate": 2.1409175558475307e-05, "loss": 1.2978, "step": 2345 }, { "epoch": 0.49, "learning_rate": 2.1395582585184397e-05, "loss": 1.2643, "step": 2346 }, { "epoch": 0.49, "learning_rate": 2.138198896405756e-05, "loss": 1.2579, "step": 2347 }, { "epoch": 0.49, "learning_rate": 2.1368394701405023e-05, "loss": 1.2258, "step": 2348 }, { "epoch": 0.49, "learning_rate": 2.1354799803537312e-05, "loss": 1.2374, "step": 2349 }, { "epoch": 0.49, "learning_rate": 2.134120427676523e-05, "loss": 1.2371, "step": 2350 }, { "epoch": 0.49, "learning_rate": 2.1327608127399895e-05, "loss": 1.2696, "step": 2351 }, { "epoch": 0.49, "learning_rate": 2.1314011361752687e-05, "loss": 1.2591, "step": 2352 }, { "epoch": 0.49, "learning_rate": 2.1300413986135313e-05, "loss": 1.2424, "step": 2353 }, { "epoch": 0.5, "learning_rate": 2.128681600685971e-05, "loss": 1.3086, "step": 2354 }, { "epoch": 0.5, "learning_rate": 2.1273217430238146e-05, "loss": 1.3011, "step": 2355 }, { "epoch": 0.5, "learning_rate": 2.1259618262583122e-05, "loss": 1.2495, "step": 2356 }, { "epoch": 0.5, "learning_rate": 2.1246018510207452e-05, "loss": 1.2858, "step": 2357 }, { "epoch": 0.5, "learning_rate": 2.1232418179424204e-05, "loss": 1.2344, "step": 2358 }, { "epoch": 0.5, "learning_rate": 2.12188172765467e-05, "loss": 1.221, "step": 2359 }, { "epoch": 0.5, "learning_rate": 2.120521580788856e-05, "loss": 1.2757, "step": 2360 }, { "epoch": 0.5, "learning_rate": 2.1191613779763635e-05, "loss": 1.2317, "step": 2361 }, { "epoch": 0.5, "learning_rate": 2.1178011198486064e-05, "loss": 1.2473, "step": 2362 }, { "epoch": 0.5, "learning_rate": 2.1164408070370212e-05, "loss": 1.276, "step": 2363 }, { "epoch": 0.5, "learning_rate": 2.1150804401730724e-05, "loss": 1.2718, "step": 2364 }, { "epoch": 0.5, "learning_rate": 2.1137200198882484e-05, "loss": 1.2227, "step": 2365 }, { "epoch": 0.5, "learning_rate": 2.112359546814063e-05, "loss": 1.2493, "step": 2366 }, { "epoch": 0.5, "learning_rate": 2.110999021582053e-05, "loss": 1.276, "step": 2367 }, { "epoch": 0.5, "learning_rate": 2.1096384448237824e-05, "loss": 1.2941, "step": 2368 }, { "epoch": 0.5, "learning_rate": 2.1082778171708355e-05, "loss": 1.224, "step": 2369 }, { "epoch": 0.5, "learning_rate": 2.1069171392548226e-05, "loss": 1.2903, "step": 2370 }, { "epoch": 0.5, "learning_rate": 2.1055564117073767e-05, "loss": 1.26, "step": 2371 }, { "epoch": 0.5, "learning_rate": 2.1041956351601543e-05, "loss": 1.2548, "step": 2372 }, { "epoch": 0.5, "learning_rate": 2.1028348102448338e-05, "loss": 1.1928, "step": 2373 }, { "epoch": 0.5, "learning_rate": 2.1014739375931166e-05, "loss": 1.27, "step": 2374 }, { "epoch": 0.5, "learning_rate": 2.1001130178367256e-05, "loss": 1.2655, "step": 2375 }, { "epoch": 0.5, "learning_rate": 2.098752051607406e-05, "loss": 1.2878, "step": 2376 }, { "epoch": 0.5, "learning_rate": 2.097391039536926e-05, "loss": 1.3185, "step": 2377 }, { "epoch": 0.5, "learning_rate": 2.0960299822570728e-05, "loss": 1.2573, "step": 2378 }, { "epoch": 0.5, "learning_rate": 2.094668880399655e-05, "loss": 1.294, "step": 2379 }, { "epoch": 0.5, "learning_rate": 2.0933077345965032e-05, "loss": 1.3151, "step": 2380 }, { "epoch": 0.5, "learning_rate": 2.0919465454794672e-05, "loss": 1.2751, "step": 2381 }, { "epoch": 0.5, "learning_rate": 2.0905853136804173e-05, "loss": 1.2886, "step": 2382 }, { "epoch": 0.5, "learning_rate": 2.089224039831244e-05, "loss": 1.2623, "step": 2383 }, { "epoch": 0.5, "learning_rate": 2.087862724563857e-05, "loss": 1.3, "step": 2384 }, { "epoch": 0.5, "learning_rate": 2.0865013685101844e-05, "loss": 1.2876, "step": 2385 }, { "epoch": 0.5, "learning_rate": 2.085139972302175e-05, "loss": 1.3102, "step": 2386 }, { "epoch": 0.5, "learning_rate": 2.083778536571795e-05, "loss": 1.2676, "step": 2387 }, { "epoch": 0.5, "learning_rate": 2.0824170619510283e-05, "loss": 1.2637, "step": 2388 }, { "epoch": 0.5, "learning_rate": 2.0810555490718787e-05, "loss": 1.2419, "step": 2389 }, { "epoch": 0.5, "learning_rate": 2.0796939985663666e-05, "loss": 1.2756, "step": 2390 }, { "epoch": 0.5, "learning_rate": 2.0783324110665306e-05, "loss": 1.275, "step": 2391 }, { "epoch": 0.5, "learning_rate": 2.0769707872044242e-05, "loss": 1.2722, "step": 2392 }, { "epoch": 0.5, "learning_rate": 2.0756091276121212e-05, "loss": 1.3089, "step": 2393 }, { "epoch": 0.5, "learning_rate": 2.0742474329217094e-05, "loss": 1.314, "step": 2394 }, { "epoch": 0.5, "learning_rate": 2.0728857037652945e-05, "loss": 1.2922, "step": 2395 }, { "epoch": 0.5, "learning_rate": 2.0715239407749973e-05, "loss": 1.2915, "step": 2396 }, { "epoch": 0.5, "learning_rate": 2.070162144582954e-05, "loss": 1.2634, "step": 2397 }, { "epoch": 0.5, "learning_rate": 2.0688003158213172e-05, "loss": 1.3197, "step": 2398 }, { "epoch": 0.5, "learning_rate": 2.067438455122255e-05, "loss": 1.2978, "step": 2399 }, { "epoch": 0.5, "learning_rate": 2.0660765631179474e-05, "loss": 1.2411, "step": 2400 }, { "epoch": 0.51, "learning_rate": 2.0647146404405923e-05, "loss": 1.2427, "step": 2401 }, { "epoch": 0.51, "learning_rate": 2.0633526877224006e-05, "loss": 1.2709, "step": 2402 }, { "epoch": 0.51, "learning_rate": 2.061990705595597e-05, "loss": 1.2201, "step": 2403 }, { "epoch": 0.51, "learning_rate": 2.060628694692419e-05, "loss": 1.2977, "step": 2404 }, { "epoch": 0.51, "learning_rate": 2.0592666556451197e-05, "loss": 1.2939, "step": 2405 }, { "epoch": 0.51, "learning_rate": 2.0579045890859635e-05, "loss": 1.2681, "step": 2406 }, { "epoch": 0.51, "learning_rate": 2.0565424956472278e-05, "loss": 1.3335, "step": 2407 }, { "epoch": 0.51, "learning_rate": 2.055180375961203e-05, "loss": 1.2893, "step": 2408 }, { "epoch": 0.51, "learning_rate": 2.053818230660191e-05, "loss": 1.2555, "step": 2409 }, { "epoch": 0.51, "learning_rate": 2.052456060376506e-05, "loss": 1.3004, "step": 2410 }, { "epoch": 0.51, "learning_rate": 2.051093865742474e-05, "loss": 1.2388, "step": 2411 }, { "epoch": 0.51, "learning_rate": 2.0497316473904324e-05, "loss": 1.2878, "step": 2412 }, { "epoch": 0.51, "learning_rate": 2.048369405952729e-05, "loss": 1.2574, "step": 2413 }, { "epoch": 0.51, "learning_rate": 2.0470071420617222e-05, "loss": 1.2572, "step": 2414 }, { "epoch": 0.51, "learning_rate": 2.045644856349782e-05, "loss": 1.2595, "step": 2415 }, { "epoch": 0.51, "learning_rate": 2.0442825494492876e-05, "loss": 1.2491, "step": 2416 }, { "epoch": 0.51, "learning_rate": 2.0429202219926273e-05, "loss": 1.2555, "step": 2417 }, { "epoch": 0.51, "learning_rate": 2.0415578746122007e-05, "loss": 1.3248, "step": 2418 }, { "epoch": 0.51, "learning_rate": 2.0401955079404154e-05, "loss": 1.2744, "step": 2419 }, { "epoch": 0.51, "learning_rate": 2.0388331226096886e-05, "loss": 1.2988, "step": 2420 }, { "epoch": 0.51, "learning_rate": 2.0374707192524455e-05, "loss": 1.2343, "step": 2421 }, { "epoch": 0.51, "learning_rate": 2.036108298501121e-05, "loss": 1.3385, "step": 2422 }, { "epoch": 0.51, "learning_rate": 2.034745860988156e-05, "loss": 1.2002, "step": 2423 }, { "epoch": 0.51, "learning_rate": 2.0333834073460018e-05, "loss": 1.2588, "step": 2424 }, { "epoch": 0.51, "learning_rate": 2.032020938207114e-05, "loss": 1.2447, "step": 2425 }, { "epoch": 0.51, "learning_rate": 2.030658454203958e-05, "loss": 1.2394, "step": 2426 }, { "epoch": 0.51, "learning_rate": 2.029295955969005e-05, "loss": 1.2984, "step": 2427 }, { "epoch": 0.51, "learning_rate": 2.027933444134733e-05, "loss": 1.2739, "step": 2428 }, { "epoch": 0.51, "learning_rate": 2.0265709193336266e-05, "loss": 1.3139, "step": 2429 }, { "epoch": 0.51, "learning_rate": 2.025208382198176e-05, "loss": 1.2836, "step": 2430 }, { "epoch": 0.51, "learning_rate": 2.0238458333608766e-05, "loss": 1.3205, "step": 2431 }, { "epoch": 0.51, "learning_rate": 2.0224832734542314e-05, "loss": 1.225, "step": 2432 }, { "epoch": 0.51, "learning_rate": 2.0211207031107457e-05, "loss": 1.2714, "step": 2433 }, { "epoch": 0.51, "learning_rate": 2.0197581229629317e-05, "loss": 1.2509, "step": 2434 }, { "epoch": 0.51, "learning_rate": 2.018395533643305e-05, "loss": 1.2631, "step": 2435 }, { "epoch": 0.51, "learning_rate": 2.017032935784386e-05, "loss": 1.2623, "step": 2436 }, { "epoch": 0.51, "learning_rate": 2.0156703300186997e-05, "loss": 1.2872, "step": 2437 }, { "epoch": 0.51, "learning_rate": 2.0143077169787725e-05, "loss": 1.2804, "step": 2438 }, { "epoch": 0.51, "learning_rate": 2.012945097297137e-05, "loss": 1.2716, "step": 2439 }, { "epoch": 0.51, "learning_rate": 2.0115824716063273e-05, "loss": 1.2535, "step": 2440 }, { "epoch": 0.51, "learning_rate": 2.0102198405388806e-05, "loss": 1.2782, "step": 2441 }, { "epoch": 0.51, "learning_rate": 2.008857204727336e-05, "loss": 1.2146, "step": 2442 }, { "epoch": 0.51, "learning_rate": 2.0074945648042353e-05, "loss": 1.2816, "step": 2443 }, { "epoch": 0.51, "learning_rate": 2.0061319214021237e-05, "loss": 1.2117, "step": 2444 }, { "epoch": 0.51, "learning_rate": 2.0047692751535454e-05, "loss": 1.2778, "step": 2445 }, { "epoch": 0.51, "learning_rate": 2.0034066266910475e-05, "loss": 1.2435, "step": 2446 }, { "epoch": 0.51, "learning_rate": 2.0020439766471775e-05, "loss": 1.2687, "step": 2447 }, { "epoch": 0.51, "learning_rate": 2.000681325654484e-05, "loss": 1.2329, "step": 2448 }, { "epoch": 0.52, "learning_rate": 1.999318674345516e-05, "loss": 1.2704, "step": 2449 }, { "epoch": 0.52, "learning_rate": 1.997956023352823e-05, "loss": 1.22, "step": 2450 }, { "epoch": 0.52, "learning_rate": 1.9965933733089535e-05, "loss": 1.2474, "step": 2451 }, { "epoch": 0.52, "learning_rate": 1.995230724846455e-05, "loss": 1.1917, "step": 2452 }, { "epoch": 0.52, "learning_rate": 1.993868078597877e-05, "loss": 1.2962, "step": 2453 }, { "epoch": 0.52, "learning_rate": 1.9925054351957647e-05, "loss": 1.289, "step": 2454 }, { "epoch": 0.52, "learning_rate": 1.9911427952726644e-05, "loss": 1.2758, "step": 2455 }, { "epoch": 0.52, "learning_rate": 1.9897801594611204e-05, "loss": 1.211, "step": 2456 }, { "epoch": 0.52, "learning_rate": 1.988417528393673e-05, "loss": 1.2334, "step": 2457 }, { "epoch": 0.52, "learning_rate": 1.9870549027028635e-05, "loss": 1.2707, "step": 2458 }, { "epoch": 0.52, "learning_rate": 1.9856922830212286e-05, "loss": 1.2852, "step": 2459 }, { "epoch": 0.52, "learning_rate": 1.984329669981301e-05, "loss": 1.2086, "step": 2460 }, { "epoch": 0.52, "learning_rate": 1.9829670642156147e-05, "loss": 1.2645, "step": 2461 }, { "epoch": 0.52, "learning_rate": 1.981604466356695e-05, "loss": 1.2752, "step": 2462 }, { "epoch": 0.52, "learning_rate": 1.980241877037069e-05, "loss": 1.1866, "step": 2463 }, { "epoch": 0.52, "learning_rate": 1.9788792968892553e-05, "loss": 1.262, "step": 2464 }, { "epoch": 0.52, "learning_rate": 1.977516726545769e-05, "loss": 1.3017, "step": 2465 }, { "epoch": 0.52, "learning_rate": 1.976154166639124e-05, "loss": 1.2054, "step": 2466 }, { "epoch": 0.52, "learning_rate": 1.9747916178018246e-05, "loss": 1.2443, "step": 2467 }, { "epoch": 0.52, "learning_rate": 1.9734290806663738e-05, "loss": 1.2655, "step": 2468 }, { "epoch": 0.52, "learning_rate": 1.9720665558652676e-05, "loss": 1.2588, "step": 2469 }, { "epoch": 0.52, "learning_rate": 1.970704044030995e-05, "loss": 1.281, "step": 2470 }, { "epoch": 0.52, "learning_rate": 1.9693415457960426e-05, "loss": 1.2657, "step": 2471 }, { "epoch": 0.52, "learning_rate": 1.9679790617928872e-05, "loss": 1.2382, "step": 2472 }, { "epoch": 0.52, "learning_rate": 1.966616592653999e-05, "loss": 1.2571, "step": 2473 }, { "epoch": 0.52, "learning_rate": 1.9652541390118443e-05, "loss": 1.2527, "step": 2474 }, { "epoch": 0.52, "learning_rate": 1.963891701498879e-05, "loss": 1.2489, "step": 2475 }, { "epoch": 0.52, "learning_rate": 1.9625292807475548e-05, "loss": 1.2523, "step": 2476 }, { "epoch": 0.52, "learning_rate": 1.9611668773903124e-05, "loss": 1.2716, "step": 2477 }, { "epoch": 0.52, "learning_rate": 1.9598044920595853e-05, "loss": 1.203, "step": 2478 }, { "epoch": 0.52, "learning_rate": 1.9584421253878e-05, "loss": 1.3159, "step": 2479 }, { "epoch": 0.52, "learning_rate": 1.9570797780073737e-05, "loss": 1.2792, "step": 2480 }, { "epoch": 0.52, "learning_rate": 1.955717450550713e-05, "loss": 1.2965, "step": 2481 }, { "epoch": 0.52, "learning_rate": 1.9543551436502186e-05, "loss": 1.2812, "step": 2482 }, { "epoch": 0.52, "learning_rate": 1.9529928579382778e-05, "loss": 1.2525, "step": 2483 }, { "epoch": 0.52, "learning_rate": 1.9516305940472714e-05, "loss": 1.2945, "step": 2484 }, { "epoch": 0.52, "learning_rate": 1.9502683526095683e-05, "loss": 1.3148, "step": 2485 }, { "epoch": 0.52, "learning_rate": 1.948906134257526e-05, "loss": 1.2594, "step": 2486 }, { "epoch": 0.52, "learning_rate": 1.947543939623495e-05, "loss": 1.2805, "step": 2487 }, { "epoch": 0.52, "learning_rate": 1.9461817693398105e-05, "loss": 1.3181, "step": 2488 }, { "epoch": 0.52, "learning_rate": 1.944819624038798e-05, "loss": 1.2239, "step": 2489 }, { "epoch": 0.52, "learning_rate": 1.943457504352773e-05, "loss": 1.2322, "step": 2490 }, { "epoch": 0.52, "learning_rate": 1.942095410914037e-05, "loss": 1.2296, "step": 2491 }, { "epoch": 0.52, "learning_rate": 1.9407333443548806e-05, "loss": 1.326, "step": 2492 }, { "epoch": 0.52, "learning_rate": 1.9393713053075816e-05, "loss": 1.2577, "step": 2493 }, { "epoch": 0.52, "learning_rate": 1.9380092944044036e-05, "loss": 1.2903, "step": 2494 }, { "epoch": 0.52, "learning_rate": 1.9366473122776e-05, "loss": 1.2448, "step": 2495 }, { "epoch": 0.53, "learning_rate": 1.9352853595594077e-05, "loss": 1.2491, "step": 2496 }, { "epoch": 0.53, "learning_rate": 1.9339234368820533e-05, "loss": 1.279, "step": 2497 }, { "epoch": 0.53, "learning_rate": 1.932561544877746e-05, "loss": 1.2312, "step": 2498 }, { "epoch": 0.53, "learning_rate": 1.9311996841786825e-05, "loss": 1.265, "step": 2499 }, { "epoch": 0.53, "learning_rate": 1.9298378554170463e-05, "loss": 1.217, "step": 2500 }, { "epoch": 0.53, "learning_rate": 1.9284760592250037e-05, "loss": 1.2786, "step": 2501 }, { "epoch": 0.53, "learning_rate": 1.9271142962347058e-05, "loss": 1.273, "step": 2502 }, { "epoch": 0.53, "learning_rate": 1.925752567078291e-05, "loss": 1.2468, "step": 2503 }, { "epoch": 0.53, "learning_rate": 1.924390872387879e-05, "loss": 1.3012, "step": 2504 }, { "epoch": 0.53, "learning_rate": 1.923029212795576e-05, "loss": 1.2393, "step": 2505 }, { "epoch": 0.53, "learning_rate": 1.9216675889334704e-05, "loss": 1.3048, "step": 2506 }, { "epoch": 0.53, "learning_rate": 1.9203060014336334e-05, "loss": 1.2441, "step": 2507 }, { "epoch": 0.53, "learning_rate": 1.9189444509281216e-05, "loss": 1.2865, "step": 2508 }, { "epoch": 0.53, "learning_rate": 1.9175829380489727e-05, "loss": 1.2377, "step": 2509 }, { "epoch": 0.53, "learning_rate": 1.9162214634282055e-05, "loss": 1.2666, "step": 2510 }, { "epoch": 0.53, "learning_rate": 1.9148600276978254e-05, "loss": 1.2533, "step": 2511 }, { "epoch": 0.53, "learning_rate": 1.9134986314898156e-05, "loss": 1.2998, "step": 2512 }, { "epoch": 0.53, "learning_rate": 1.9121372754361437e-05, "loss": 1.2494, "step": 2513 }, { "epoch": 0.53, "learning_rate": 1.9107759601687562e-05, "loss": 1.2354, "step": 2514 }, { "epoch": 0.53, "learning_rate": 1.909414686319583e-05, "loss": 1.2535, "step": 2515 }, { "epoch": 0.53, "learning_rate": 1.9080534545205334e-05, "loss": 1.2787, "step": 2516 }, { "epoch": 0.53, "learning_rate": 1.9066922654034975e-05, "loss": 1.2648, "step": 2517 }, { "epoch": 0.53, "learning_rate": 1.9053311196003457e-05, "loss": 1.3116, "step": 2518 }, { "epoch": 0.53, "learning_rate": 1.9039700177429282e-05, "loss": 1.2336, "step": 2519 }, { "epoch": 0.53, "learning_rate": 1.9026089604630743e-05, "loss": 1.307, "step": 2520 }, { "epoch": 0.53, "learning_rate": 1.9012479483925942e-05, "loss": 1.2066, "step": 2521 }, { "epoch": 0.53, "learning_rate": 1.8998869821632757e-05, "loss": 1.2977, "step": 2522 }, { "epoch": 0.53, "learning_rate": 1.898526062406884e-05, "loss": 1.2949, "step": 2523 }, { "epoch": 0.53, "learning_rate": 1.8971651897551672e-05, "loss": 1.2905, "step": 2524 }, { "epoch": 0.53, "learning_rate": 1.8958043648398457e-05, "loss": 1.2528, "step": 2525 }, { "epoch": 0.53, "learning_rate": 1.8944435882926236e-05, "loss": 1.2868, "step": 2526 }, { "epoch": 0.53, "learning_rate": 1.893082860745178e-05, "loss": 1.1937, "step": 2527 }, { "epoch": 0.53, "learning_rate": 1.8917221828291652e-05, "loss": 1.2792, "step": 2528 }, { "epoch": 0.53, "learning_rate": 1.8903615551762182e-05, "loss": 1.2896, "step": 2529 }, { "epoch": 0.53, "learning_rate": 1.8890009784179476e-05, "loss": 1.2784, "step": 2530 }, { "epoch": 0.53, "learning_rate": 1.8876404531859376e-05, "loss": 1.3013, "step": 2531 }, { "epoch": 0.53, "learning_rate": 1.8862799801117523e-05, "loss": 1.2834, "step": 2532 }, { "epoch": 0.53, "learning_rate": 1.884919559826928e-05, "loss": 1.2875, "step": 2533 }, { "epoch": 0.53, "learning_rate": 1.8835591929629795e-05, "loss": 1.2709, "step": 2534 }, { "epoch": 0.53, "learning_rate": 1.882198880151395e-05, "loss": 1.227, "step": 2535 }, { "epoch": 0.53, "learning_rate": 1.8808386220236365e-05, "loss": 1.2695, "step": 2536 }, { "epoch": 0.53, "learning_rate": 1.8794784192111448e-05, "loss": 1.2871, "step": 2537 }, { "epoch": 0.53, "learning_rate": 1.8781182723453303e-05, "loss": 1.2799, "step": 2538 }, { "epoch": 0.53, "learning_rate": 1.8767581820575803e-05, "loss": 1.2406, "step": 2539 }, { "epoch": 0.53, "learning_rate": 1.875398148979255e-05, "loss": 1.2502, "step": 2540 }, { "epoch": 0.53, "learning_rate": 1.874038173741688e-05, "loss": 1.2613, "step": 2541 }, { "epoch": 0.53, "learning_rate": 1.8726782569761864e-05, "loss": 1.2224, "step": 2542 }, { "epoch": 0.53, "learning_rate": 1.87131839931403e-05, "loss": 1.2702, "step": 2543 }, { "epoch": 0.54, "learning_rate": 1.8699586013864694e-05, "loss": 1.2646, "step": 2544 }, { "epoch": 0.54, "learning_rate": 1.8685988638247316e-05, "loss": 1.2868, "step": 2545 }, { "epoch": 0.54, "learning_rate": 1.8672391872600108e-05, "loss": 1.1826, "step": 2546 }, { "epoch": 0.54, "learning_rate": 1.8658795723234774e-05, "loss": 1.2546, "step": 2547 }, { "epoch": 0.54, "learning_rate": 1.8645200196462698e-05, "loss": 1.2052, "step": 2548 }, { "epoch": 0.54, "learning_rate": 1.8631605298594977e-05, "loss": 1.2719, "step": 2549 }, { "epoch": 0.54, "learning_rate": 1.8618011035942444e-05, "loss": 1.2677, "step": 2550 }, { "epoch": 0.54, "learning_rate": 1.860441741481561e-05, "loss": 1.2691, "step": 2551 }, { "epoch": 0.54, "learning_rate": 1.8590824441524696e-05, "loss": 1.2376, "step": 2552 }, { "epoch": 0.54, "learning_rate": 1.8577232122379625e-05, "loss": 1.2478, "step": 2553 }, { "epoch": 0.54, "learning_rate": 1.8563640463690015e-05, "loss": 1.2427, "step": 2554 }, { "epoch": 0.54, "learning_rate": 1.8550049471765176e-05, "loss": 1.2358, "step": 2555 }, { "epoch": 0.54, "learning_rate": 1.853645915291412e-05, "loss": 1.3074, "step": 2556 }, { "epoch": 0.54, "learning_rate": 1.8522869513445515e-05, "loss": 1.3063, "step": 2557 }, { "epoch": 0.54, "learning_rate": 1.850928055966775e-05, "loss": 1.2549, "step": 2558 }, { "epoch": 0.54, "learning_rate": 1.8495692297888885e-05, "loss": 1.267, "step": 2559 }, { "epoch": 0.54, "learning_rate": 1.848210473441664e-05, "loss": 1.2571, "step": 2560 }, { "epoch": 0.54, "learning_rate": 1.8468517875558433e-05, "loss": 1.2976, "step": 2561 }, { "epoch": 0.54, "learning_rate": 1.845493172762134e-05, "loss": 1.3059, "step": 2562 }, { "epoch": 0.54, "learning_rate": 1.8441346296912128e-05, "loss": 1.2603, "step": 2563 }, { "epoch": 0.54, "learning_rate": 1.8427761589737203e-05, "loss": 1.2504, "step": 2564 }, { "epoch": 0.54, "learning_rate": 1.8414177612402657e-05, "loss": 1.3052, "step": 2565 }, { "epoch": 0.54, "learning_rate": 1.8400594371214234e-05, "loss": 1.2742, "step": 2566 }, { "epoch": 0.54, "learning_rate": 1.8387011872477338e-05, "loss": 1.2572, "step": 2567 }, { "epoch": 0.54, "learning_rate": 1.8373430122497022e-05, "loss": 1.2841, "step": 2568 }, { "epoch": 0.54, "learning_rate": 1.835984912757801e-05, "loss": 1.2668, "step": 2569 }, { "epoch": 0.54, "learning_rate": 1.8346268894024644e-05, "loss": 1.2975, "step": 2570 }, { "epoch": 0.54, "learning_rate": 1.8332689428140956e-05, "loss": 1.2527, "step": 2571 }, { "epoch": 0.54, "learning_rate": 1.831911073623058e-05, "loss": 1.2708, "step": 2572 }, { "epoch": 0.54, "learning_rate": 1.830553282459681e-05, "loss": 1.2842, "step": 2573 }, { "epoch": 0.54, "learning_rate": 1.8291955699542584e-05, "loss": 1.2696, "step": 2574 }, { "epoch": 0.54, "learning_rate": 1.8278379367370448e-05, "loss": 1.2579, "step": 2575 }, { "epoch": 0.54, "learning_rate": 1.8264803834382622e-05, "loss": 1.284, "step": 2576 }, { "epoch": 0.54, "learning_rate": 1.8251229106880916e-05, "loss": 1.2991, "step": 2577 }, { "epoch": 0.54, "learning_rate": 1.8237655191166785e-05, "loss": 1.289, "step": 2578 }, { "epoch": 0.54, "learning_rate": 1.8224082093541306e-05, "loss": 1.2414, "step": 2579 }, { "epoch": 0.54, "learning_rate": 1.8210509820305174e-05, "loss": 1.2379, "step": 2580 }, { "epoch": 0.54, "learning_rate": 1.8196938377758696e-05, "loss": 1.2553, "step": 2581 }, { "epoch": 0.54, "learning_rate": 1.818336777220181e-05, "loss": 1.2305, "step": 2582 }, { "epoch": 0.54, "learning_rate": 1.8169798009934038e-05, "loss": 1.2905, "step": 2583 }, { "epoch": 0.54, "learning_rate": 1.815622909725454e-05, "loss": 1.3296, "step": 2584 }, { "epoch": 0.54, "learning_rate": 1.8142661040462068e-05, "loss": 1.265, "step": 2585 }, { "epoch": 0.54, "learning_rate": 1.8129093845854965e-05, "loss": 1.2608, "step": 2586 }, { "epoch": 0.54, "learning_rate": 1.81155275197312e-05, "loss": 1.2616, "step": 2587 }, { "epoch": 0.54, "learning_rate": 1.8101962068388315e-05, "loss": 1.2136, "step": 2588 }, { "epoch": 0.54, "learning_rate": 1.808839749812346e-05, "loss": 1.2556, "step": 2589 }, { "epoch": 0.54, "learning_rate": 1.807483381523337e-05, "loss": 1.27, "step": 2590 }, { "epoch": 0.54, "learning_rate": 1.8061271026014364e-05, "loss": 1.2541, "step": 2591 }, { "epoch": 0.55, "learning_rate": 1.8047709136762368e-05, "loss": 1.2369, "step": 2592 }, { "epoch": 0.55, "learning_rate": 1.8034148153772864e-05, "loss": 1.2211, "step": 2593 }, { "epoch": 0.55, "learning_rate": 1.8020588083340912e-05, "loss": 1.3083, "step": 2594 }, { "epoch": 0.55, "learning_rate": 1.8007028931761184e-05, "loss": 1.2896, "step": 2595 }, { "epoch": 0.55, "learning_rate": 1.7993470705327877e-05, "loss": 1.2885, "step": 2596 }, { "epoch": 0.55, "learning_rate": 1.79799134103348e-05, "loss": 1.2292, "step": 2597 }, { "epoch": 0.55, "learning_rate": 1.7966357053075312e-05, "loss": 1.299, "step": 2598 }, { "epoch": 0.55, "learning_rate": 1.795280163984232e-05, "loss": 1.2675, "step": 2599 }, { "epoch": 0.55, "learning_rate": 1.7939247176928328e-05, "loss": 1.2745, "step": 2600 }, { "epoch": 0.55, "learning_rate": 1.792569367062537e-05, "loss": 1.2909, "step": 2601 }, { "epoch": 0.55, "learning_rate": 1.791214112722505e-05, "loss": 1.2501, "step": 2602 }, { "epoch": 0.55, "learning_rate": 1.7898589553018523e-05, "loss": 1.2755, "step": 2603 }, { "epoch": 0.55, "learning_rate": 1.788503895429649e-05, "loss": 1.2249, "step": 2604 }, { "epoch": 0.55, "learning_rate": 1.7871489337349208e-05, "loss": 1.2698, "step": 2605 }, { "epoch": 0.55, "learning_rate": 1.785794070846647e-05, "loss": 1.238, "step": 2606 }, { "epoch": 0.55, "learning_rate": 1.78443930739376e-05, "loss": 1.287, "step": 2607 }, { "epoch": 0.55, "learning_rate": 1.7830846440051493e-05, "loss": 1.2503, "step": 2608 }, { "epoch": 0.55, "learning_rate": 1.7817300813096548e-05, "loss": 1.2666, "step": 2609 }, { "epoch": 0.55, "learning_rate": 1.7803756199360704e-05, "loss": 1.2553, "step": 2610 }, { "epoch": 0.55, "learning_rate": 1.7790212605131448e-05, "loss": 1.2677, "step": 2611 }, { "epoch": 0.55, "learning_rate": 1.7776670036695758e-05, "loss": 1.3103, "step": 2612 }, { "epoch": 0.55, "learning_rate": 1.776312850034018e-05, "loss": 1.2804, "step": 2613 }, { "epoch": 0.55, "learning_rate": 1.7749588002350748e-05, "loss": 1.2936, "step": 2614 }, { "epoch": 0.55, "learning_rate": 1.7736048549013013e-05, "loss": 1.2828, "step": 2615 }, { "epoch": 0.55, "learning_rate": 1.7722510146612075e-05, "loss": 1.1981, "step": 2616 }, { "epoch": 0.55, "learning_rate": 1.770897280143251e-05, "loss": 1.218, "step": 2617 }, { "epoch": 0.55, "learning_rate": 1.7695436519758412e-05, "loss": 1.2663, "step": 2618 }, { "epoch": 0.55, "learning_rate": 1.76819013078734e-05, "loss": 1.2229, "step": 2619 }, { "epoch": 0.55, "learning_rate": 1.7668367172060562e-05, "loss": 1.284, "step": 2620 }, { "epoch": 0.55, "learning_rate": 1.765483411860253e-05, "loss": 1.308, "step": 2621 }, { "epoch": 0.55, "learning_rate": 1.7641302153781402e-05, "loss": 1.2825, "step": 2622 }, { "epoch": 0.55, "learning_rate": 1.7627771283878764e-05, "loss": 1.2576, "step": 2623 }, { "epoch": 0.55, "learning_rate": 1.761424151517573e-05, "loss": 1.3091, "step": 2624 }, { "epoch": 0.55, "learning_rate": 1.7600712853952863e-05, "loss": 1.2467, "step": 2625 }, { "epoch": 0.55, "learning_rate": 1.7587185306490245e-05, "loss": 1.2428, "step": 2626 }, { "epoch": 0.55, "learning_rate": 1.7573658879067424e-05, "loss": 1.2758, "step": 2627 }, { "epoch": 0.55, "learning_rate": 1.7560133577963423e-05, "loss": 1.2585, "step": 2628 }, { "epoch": 0.55, "learning_rate": 1.754660940945676e-05, "loss": 1.2334, "step": 2629 }, { "epoch": 0.55, "learning_rate": 1.753308637982541e-05, "loss": 1.222, "step": 2630 }, { "epoch": 0.55, "learning_rate": 1.751956449534682e-05, "loss": 1.2623, "step": 2631 }, { "epoch": 0.55, "learning_rate": 1.7506043762297932e-05, "loss": 1.32, "step": 2632 }, { "epoch": 0.55, "learning_rate": 1.7492524186955108e-05, "loss": 1.2514, "step": 2633 }, { "epoch": 0.55, "learning_rate": 1.7479005775594216e-05, "loss": 1.3031, "step": 2634 }, { "epoch": 0.55, "learning_rate": 1.746548853449056e-05, "loss": 1.2556, "step": 2635 }, { "epoch": 0.55, "learning_rate": 1.74519724699189e-05, "loss": 1.2561, "step": 2636 }, { "epoch": 0.55, "learning_rate": 1.7438457588153466e-05, "loss": 1.1857, "step": 2637 }, { "epoch": 0.55, "learning_rate": 1.742494389546792e-05, "loss": 1.2086, "step": 2638 }, { "epoch": 0.56, "learning_rate": 1.7411431398135384e-05, "loss": 1.2687, "step": 2639 }, { "epoch": 0.56, "learning_rate": 1.739792010242843e-05, "loss": 1.2376, "step": 2640 }, { "epoch": 0.56, "learning_rate": 1.738441001461905e-05, "loss": 1.3122, "step": 2641 }, { "epoch": 0.56, "learning_rate": 1.7370901140978706e-05, "loss": 1.2921, "step": 2642 }, { "epoch": 0.56, "learning_rate": 1.735739348777827e-05, "loss": 1.2603, "step": 2643 }, { "epoch": 0.56, "learning_rate": 1.734388706128805e-05, "loss": 1.319, "step": 2644 }, { "epoch": 0.56, "learning_rate": 1.7330381867777808e-05, "loss": 1.2287, "step": 2645 }, { "epoch": 0.56, "learning_rate": 1.731687791351671e-05, "loss": 1.2493, "step": 2646 }, { "epoch": 0.56, "learning_rate": 1.730337520477335e-05, "loss": 1.2435, "step": 2647 }, { "epoch": 0.56, "learning_rate": 1.7289873747815755e-05, "loss": 1.2438, "step": 2648 }, { "epoch": 0.56, "learning_rate": 1.7276373548911355e-05, "loss": 1.2156, "step": 2649 }, { "epoch": 0.56, "learning_rate": 1.7262874614327016e-05, "loss": 1.2229, "step": 2650 }, { "epoch": 0.56, "learning_rate": 1.7249376950329004e-05, "loss": 1.22, "step": 2651 }, { "epoch": 0.56, "learning_rate": 1.7235880563182988e-05, "loss": 1.1856, "step": 2652 }, { "epoch": 0.56, "learning_rate": 1.7222385459154072e-05, "loss": 1.3036, "step": 2653 }, { "epoch": 0.56, "learning_rate": 1.720889164450672e-05, "loss": 1.2084, "step": 2654 }, { "epoch": 0.56, "learning_rate": 1.7195399125504853e-05, "loss": 1.2874, "step": 2655 }, { "epoch": 0.56, "learning_rate": 1.7181907908411744e-05, "loss": 1.2356, "step": 2656 }, { "epoch": 0.56, "learning_rate": 1.716841799949007e-05, "loss": 1.2575, "step": 2657 }, { "epoch": 0.56, "learning_rate": 1.7154929405001936e-05, "loss": 1.2283, "step": 2658 }, { "epoch": 0.56, "learning_rate": 1.7141442131208788e-05, "loss": 1.2843, "step": 2659 }, { "epoch": 0.56, "learning_rate": 1.712795618437148e-05, "loss": 1.2767, "step": 2660 }, { "epoch": 0.56, "learning_rate": 1.7114471570750266e-05, "loss": 1.2471, "step": 2661 }, { "epoch": 0.56, "learning_rate": 1.7100988296604756e-05, "loss": 1.2411, "step": 2662 }, { "epoch": 0.56, "learning_rate": 1.708750636819395e-05, "loss": 1.2241, "step": 2663 }, { "epoch": 0.56, "learning_rate": 1.7074025791776232e-05, "loss": 1.2435, "step": 2664 }, { "epoch": 0.56, "learning_rate": 1.706054657360933e-05, "loss": 1.2949, "step": 2665 }, { "epoch": 0.56, "learning_rate": 1.704706871995038e-05, "loss": 1.2506, "step": 2666 }, { "epoch": 0.56, "learning_rate": 1.703359223705585e-05, "loss": 1.2397, "step": 2667 }, { "epoch": 0.56, "learning_rate": 1.7020117131181585e-05, "loss": 1.3231, "step": 2668 }, { "epoch": 0.56, "learning_rate": 1.70066434085828e-05, "loss": 1.2777, "step": 2669 }, { "epoch": 0.56, "learning_rate": 1.6993171075514054e-05, "loss": 1.242, "step": 2670 }, { "epoch": 0.56, "learning_rate": 1.697970013822927e-05, "loss": 1.2717, "step": 2671 }, { "epoch": 0.56, "learning_rate": 1.6966230602981727e-05, "loss": 1.2544, "step": 2672 }, { "epoch": 0.56, "learning_rate": 1.6952762476024023e-05, "loss": 1.2066, "step": 2673 }, { "epoch": 0.56, "learning_rate": 1.6939295763608146e-05, "loss": 1.1992, "step": 2674 }, { "epoch": 0.56, "learning_rate": 1.6925830471985398e-05, "loss": 1.2805, "step": 2675 }, { "epoch": 0.56, "learning_rate": 1.6912366607406433e-05, "loss": 1.2913, "step": 2676 }, { "epoch": 0.56, "learning_rate": 1.6898904176121246e-05, "loss": 1.234, "step": 2677 }, { "epoch": 0.56, "learning_rate": 1.688544318437914e-05, "loss": 1.309, "step": 2678 }, { "epoch": 0.56, "learning_rate": 1.6871983638428794e-05, "loss": 1.3139, "step": 2679 }, { "epoch": 0.56, "learning_rate": 1.685852554451818e-05, "loss": 1.2674, "step": 2680 }, { "epoch": 0.56, "learning_rate": 1.6845068908894597e-05, "loss": 1.2524, "step": 2681 }, { "epoch": 0.56, "learning_rate": 1.68316137378047e-05, "loss": 1.2966, "step": 2682 }, { "epoch": 0.56, "learning_rate": 1.681816003749442e-05, "loss": 1.2549, "step": 2683 }, { "epoch": 0.56, "learning_rate": 1.6804707814209046e-05, "loss": 1.2338, "step": 2684 }, { "epoch": 0.56, "learning_rate": 1.6791257074193156e-05, "loss": 1.304, "step": 2685 }, { "epoch": 0.56, "learning_rate": 1.677780782369064e-05, "loss": 1.274, "step": 2686 }, { "epoch": 0.57, "learning_rate": 1.6764360068944706e-05, "loss": 1.2277, "step": 2687 }, { "epoch": 0.57, "learning_rate": 1.6750913816197873e-05, "loss": 1.3203, "step": 2688 }, { "epoch": 0.57, "learning_rate": 1.6737469071691936e-05, "loss": 1.2951, "step": 2689 }, { "epoch": 0.57, "learning_rate": 1.6724025841668026e-05, "loss": 1.2688, "step": 2690 }, { "epoch": 0.57, "learning_rate": 1.6710584132366542e-05, "loss": 1.288, "step": 2691 }, { "epoch": 0.57, "learning_rate": 1.6697143950027194e-05, "loss": 1.2346, "step": 2692 }, { "epoch": 0.57, "learning_rate": 1.6683705300888977e-05, "loss": 1.2722, "step": 2693 }, { "epoch": 0.57, "learning_rate": 1.667026819119016e-05, "loss": 1.3016, "step": 2694 }, { "epoch": 0.57, "learning_rate": 1.6656832627168338e-05, "loss": 1.3038, "step": 2695 }, { "epoch": 0.57, "learning_rate": 1.6643398615060346e-05, "loss": 1.268, "step": 2696 }, { "epoch": 0.57, "learning_rate": 1.6629966161102304e-05, "loss": 1.2274, "step": 2697 }, { "epoch": 0.57, "learning_rate": 1.661653527152964e-05, "loss": 1.2902, "step": 2698 }, { "epoch": 0.57, "learning_rate": 1.6603105952577024e-05, "loss": 1.2847, "step": 2699 }, { "epoch": 0.57, "learning_rate": 1.6589678210478415e-05, "loss": 1.3047, "step": 2700 }, { "epoch": 0.57, "learning_rate": 1.657625205146703e-05, "loss": 1.2448, "step": 2701 }, { "epoch": 0.57, "learning_rate": 1.6562827481775353e-05, "loss": 1.285, "step": 2702 }, { "epoch": 0.57, "learning_rate": 1.6549404507635135e-05, "loss": 1.2517, "step": 2703 }, { "epoch": 0.57, "learning_rate": 1.6535983135277378e-05, "loss": 1.316, "step": 2704 }, { "epoch": 0.57, "learning_rate": 1.6522563370932355e-05, "loss": 1.2458, "step": 2705 }, { "epoch": 0.57, "learning_rate": 1.6509145220829574e-05, "loss": 1.2566, "step": 2706 }, { "epoch": 0.57, "learning_rate": 1.64957286911978e-05, "loss": 1.2483, "step": 2707 }, { "epoch": 0.57, "learning_rate": 1.6482313788265058e-05, "loss": 1.2282, "step": 2708 }, { "epoch": 0.57, "learning_rate": 1.64689005182586e-05, "loss": 1.2725, "step": 2709 }, { "epoch": 0.57, "learning_rate": 1.6455488887404935e-05, "loss": 1.2895, "step": 2710 }, { "epoch": 0.57, "learning_rate": 1.6442078901929803e-05, "loss": 1.2979, "step": 2711 }, { "epoch": 0.57, "learning_rate": 1.6428670568058176e-05, "loss": 1.2606, "step": 2712 }, { "epoch": 0.57, "learning_rate": 1.641526389201427e-05, "loss": 1.2355, "step": 2713 }, { "epoch": 0.57, "learning_rate": 1.640185888002153e-05, "loss": 1.285, "step": 2714 }, { "epoch": 0.57, "learning_rate": 1.6388455538302612e-05, "loss": 1.2582, "step": 2715 }, { "epoch": 0.57, "learning_rate": 1.6375053873079424e-05, "loss": 1.2936, "step": 2716 }, { "epoch": 0.57, "learning_rate": 1.6361653890573078e-05, "loss": 1.2875, "step": 2717 }, { "epoch": 0.57, "learning_rate": 1.6348255597003896e-05, "loss": 1.2882, "step": 2718 }, { "epoch": 0.57, "learning_rate": 1.633485899859144e-05, "loss": 1.2174, "step": 2719 }, { "epoch": 0.57, "learning_rate": 1.632146410155447e-05, "loss": 1.1721, "step": 2720 }, { "epoch": 0.57, "learning_rate": 1.6308070912110965e-05, "loss": 1.2582, "step": 2721 }, { "epoch": 0.57, "learning_rate": 1.6294679436478095e-05, "loss": 1.2698, "step": 2722 }, { "epoch": 0.57, "learning_rate": 1.6281289680872252e-05, "loss": 1.2157, "step": 2723 }, { "epoch": 0.57, "learning_rate": 1.6267901651509022e-05, "loss": 1.2678, "step": 2724 }, { "epoch": 0.57, "learning_rate": 1.6254515354603194e-05, "loss": 1.2412, "step": 2725 }, { "epoch": 0.57, "learning_rate": 1.6241130796368737e-05, "loss": 1.2696, "step": 2726 }, { "epoch": 0.57, "learning_rate": 1.6227747983018845e-05, "loss": 1.2756, "step": 2727 }, { "epoch": 0.57, "learning_rate": 1.6214366920765856e-05, "loss": 1.2865, "step": 2728 }, { "epoch": 0.57, "learning_rate": 1.620098761582135e-05, "loss": 1.2554, "step": 2729 }, { "epoch": 0.57, "learning_rate": 1.6187610074396044e-05, "loss": 1.2372, "step": 2730 }, { "epoch": 0.57, "learning_rate": 1.6174234302699856e-05, "loss": 1.291, "step": 2731 }, { "epoch": 0.57, "learning_rate": 1.616086030694189e-05, "loss": 1.2583, "step": 2732 }, { "epoch": 0.57, "learning_rate": 1.6147488093330405e-05, "loss": 1.2837, "step": 2733 }, { "epoch": 0.58, "learning_rate": 1.6134117668072858e-05, "loss": 1.2748, "step": 2734 }, { "epoch": 0.58, "learning_rate": 1.612074903737585e-05, "loss": 1.2685, "step": 2735 }, { "epoch": 0.58, "learning_rate": 1.610738220744517e-05, "loss": 1.2561, "step": 2736 }, { "epoch": 0.58, "learning_rate": 1.6094017184485763e-05, "loss": 1.2569, "step": 2737 }, { "epoch": 0.58, "learning_rate": 1.6080653974701732e-05, "loss": 1.2776, "step": 2738 }, { "epoch": 0.58, "learning_rate": 1.6067292584296333e-05, "loss": 1.2768, "step": 2739 }, { "epoch": 0.58, "learning_rate": 1.6053933019472003e-05, "loss": 1.2387, "step": 2740 }, { "epoch": 0.58, "learning_rate": 1.6040575286430295e-05, "loss": 1.2502, "step": 2741 }, { "epoch": 0.58, "learning_rate": 1.602721939137195e-05, "loss": 1.2123, "step": 2742 }, { "epoch": 0.58, "learning_rate": 1.6013865340496826e-05, "loss": 1.2785, "step": 2743 }, { "epoch": 0.58, "learning_rate": 1.6000513140003927e-05, "loss": 1.2757, "step": 2744 }, { "epoch": 0.58, "learning_rate": 1.5987162796091428e-05, "loss": 1.2975, "step": 2745 }, { "epoch": 0.58, "learning_rate": 1.5973814314956602e-05, "loss": 1.2783, "step": 2746 }, { "epoch": 0.58, "learning_rate": 1.596046770279588e-05, "loss": 1.2931, "step": 2747 }, { "epoch": 0.58, "learning_rate": 1.5947122965804827e-05, "loss": 1.3106, "step": 2748 }, { "epoch": 0.58, "learning_rate": 1.5933780110178128e-05, "loss": 1.2796, "step": 2749 }, { "epoch": 0.58, "learning_rate": 1.59204391421096e-05, "loss": 1.2381, "step": 2750 }, { "epoch": 0.58, "learning_rate": 1.5907100067792186e-05, "loss": 1.2646, "step": 2751 }, { "epoch": 0.58, "learning_rate": 1.589376289341793e-05, "loss": 1.266, "step": 2752 }, { "epoch": 0.58, "learning_rate": 1.5880427625178035e-05, "loss": 1.2513, "step": 2753 }, { "epoch": 0.58, "learning_rate": 1.586709426926277e-05, "loss": 1.2819, "step": 2754 }, { "epoch": 0.58, "learning_rate": 1.5853762831861567e-05, "loss": 1.2558, "step": 2755 }, { "epoch": 0.58, "learning_rate": 1.5840433319162925e-05, "loss": 1.2937, "step": 2756 }, { "epoch": 0.58, "learning_rate": 1.5827105737354456e-05, "loss": 1.2824, "step": 2757 }, { "epoch": 0.58, "learning_rate": 1.5813780092622907e-05, "loss": 1.261, "step": 2758 }, { "epoch": 0.58, "learning_rate": 1.580045639115409e-05, "loss": 1.2662, "step": 2759 }, { "epoch": 0.58, "learning_rate": 1.5787134639132935e-05, "loss": 1.3024, "step": 2760 }, { "epoch": 0.58, "learning_rate": 1.577381484274346e-05, "loss": 1.2585, "step": 2761 }, { "epoch": 0.58, "learning_rate": 1.576049700816877e-05, "loss": 1.2769, "step": 2762 }, { "epoch": 0.58, "learning_rate": 1.574718114159108e-05, "loss": 1.2846, "step": 2763 }, { "epoch": 0.58, "learning_rate": 1.5733867249191667e-05, "loss": 1.2085, "step": 2764 }, { "epoch": 0.58, "learning_rate": 1.57205553371509e-05, "loss": 1.2509, "step": 2765 }, { "epoch": 0.58, "learning_rate": 1.570724541164824e-05, "loss": 1.2795, "step": 2766 }, { "epoch": 0.58, "learning_rate": 1.569393747886221e-05, "loss": 1.2655, "step": 2767 }, { "epoch": 0.58, "learning_rate": 1.5680631544970405e-05, "loss": 1.2706, "step": 2768 }, { "epoch": 0.58, "learning_rate": 1.5667327616149522e-05, "loss": 1.2338, "step": 2769 }, { "epoch": 0.58, "learning_rate": 1.5654025698575286e-05, "loss": 1.3053, "step": 2770 }, { "epoch": 0.58, "learning_rate": 1.5640725798422525e-05, "loss": 1.2229, "step": 2771 }, { "epoch": 0.58, "learning_rate": 1.5627427921865106e-05, "loss": 1.2047, "step": 2772 }, { "epoch": 0.58, "learning_rate": 1.5614132075075967e-05, "loss": 1.2788, "step": 2773 }, { "epoch": 0.58, "learning_rate": 1.5600838264227102e-05, "loss": 1.2995, "step": 2774 }, { "epoch": 0.58, "learning_rate": 1.5587546495489563e-05, "loss": 1.2613, "step": 2775 }, { "epoch": 0.58, "learning_rate": 1.557425677503344e-05, "loss": 1.2044, "step": 2776 }, { "epoch": 0.58, "learning_rate": 1.5560969109027896e-05, "loss": 1.2386, "step": 2777 }, { "epoch": 0.58, "learning_rate": 1.5547683503641115e-05, "loss": 1.2889, "step": 2778 }, { "epoch": 0.58, "learning_rate": 1.5534399965040353e-05, "loss": 1.2513, "step": 2779 }, { "epoch": 0.58, "learning_rate": 1.552111849939188e-05, "loss": 1.2381, "step": 2780 }, { "epoch": 0.58, "learning_rate": 1.550783911286101e-05, "loss": 1.2871, "step": 2781 }, { "epoch": 0.59, "learning_rate": 1.5494561811612102e-05, "loss": 1.3133, "step": 2782 }, { "epoch": 0.59, "learning_rate": 1.548128660180854e-05, "loss": 1.2307, "step": 2783 }, { "epoch": 0.59, "learning_rate": 1.5468013489612742e-05, "loss": 1.2312, "step": 2784 }, { "epoch": 0.59, "learning_rate": 1.5454742481186137e-05, "loss": 1.3291, "step": 2785 }, { "epoch": 0.59, "learning_rate": 1.5441473582689198e-05, "loss": 1.2897, "step": 2786 }, { "epoch": 0.59, "learning_rate": 1.5428206800281413e-05, "loss": 1.2645, "step": 2787 }, { "epoch": 0.59, "learning_rate": 1.5414942140121278e-05, "loss": 1.2757, "step": 2788 }, { "epoch": 0.59, "learning_rate": 1.54016796083663e-05, "loss": 1.2955, "step": 2789 }, { "epoch": 0.59, "learning_rate": 1.538841921117303e-05, "loss": 1.2767, "step": 2790 }, { "epoch": 0.59, "learning_rate": 1.5375160954696986e-05, "loss": 1.219, "step": 2791 }, { "epoch": 0.59, "learning_rate": 1.536190484509273e-05, "loss": 1.2828, "step": 2792 }, { "epoch": 0.59, "learning_rate": 1.5348650888513798e-05, "loss": 1.1934, "step": 2793 }, { "epoch": 0.59, "learning_rate": 1.533539909111273e-05, "loss": 1.2265, "step": 2794 }, { "epoch": 0.59, "learning_rate": 1.5322149459041097e-05, "loss": 1.2764, "step": 2795 }, { "epoch": 0.59, "learning_rate": 1.5308901998449415e-05, "loss": 1.232, "step": 2796 }, { "epoch": 0.59, "learning_rate": 1.5295656715487226e-05, "loss": 1.2899, "step": 2797 }, { "epoch": 0.59, "learning_rate": 1.5282413616303063e-05, "loss": 1.3017, "step": 2798 }, { "epoch": 0.59, "learning_rate": 1.526917270704441e-05, "loss": 1.2194, "step": 2799 }, { "epoch": 0.59, "learning_rate": 1.5255933993857785e-05, "loss": 1.2998, "step": 2800 }, { "epoch": 0.59, "learning_rate": 1.5242697482888649e-05, "loss": 1.2646, "step": 2801 }, { "epoch": 0.59, "learning_rate": 1.5229463180281441e-05, "loss": 1.2445, "step": 2802 }, { "epoch": 0.59, "learning_rate": 1.5216231092179604e-05, "loss": 1.2482, "step": 2803 }, { "epoch": 0.59, "learning_rate": 1.5203001224725525e-05, "loss": 1.2311, "step": 2804 }, { "epoch": 0.59, "learning_rate": 1.5189773584060563e-05, "loss": 1.2904, "step": 2805 }, { "epoch": 0.59, "learning_rate": 1.517654817632507e-05, "loss": 1.2851, "step": 2806 }, { "epoch": 0.59, "learning_rate": 1.5163325007658319e-05, "loss": 1.2917, "step": 2807 }, { "epoch": 0.59, "learning_rate": 1.5150104084198587e-05, "loss": 1.2033, "step": 2808 }, { "epoch": 0.59, "learning_rate": 1.5136885412083073e-05, "loss": 1.255, "step": 2809 }, { "epoch": 0.59, "learning_rate": 1.5123668997447948e-05, "loss": 1.2871, "step": 2810 }, { "epoch": 0.59, "learning_rate": 1.5110454846428348e-05, "loss": 1.2719, "step": 2811 }, { "epoch": 0.59, "learning_rate": 1.5097242965158322e-05, "loss": 1.246, "step": 2812 }, { "epoch": 0.59, "learning_rate": 1.5084033359770907e-05, "loss": 1.2591, "step": 2813 }, { "epoch": 0.59, "learning_rate": 1.5070826036398052e-05, "loss": 1.2777, "step": 2814 }, { "epoch": 0.59, "learning_rate": 1.505762100117065e-05, "loss": 1.1695, "step": 2815 }, { "epoch": 0.59, "learning_rate": 1.5044418260218559e-05, "loss": 1.3016, "step": 2816 }, { "epoch": 0.59, "learning_rate": 1.503121781967054e-05, "loss": 1.3046, "step": 2817 }, { "epoch": 0.59, "learning_rate": 1.5018019685654295e-05, "loss": 1.2725, "step": 2818 }, { "epoch": 0.59, "learning_rate": 1.5004823864296472e-05, "loss": 1.2431, "step": 2819 }, { "epoch": 0.59, "learning_rate": 1.4991630361722619e-05, "loss": 1.2256, "step": 2820 }, { "epoch": 0.59, "learning_rate": 1.4978439184057233e-05, "loss": 1.202, "step": 2821 }, { "epoch": 0.59, "learning_rate": 1.4965250337423718e-05, "loss": 1.2295, "step": 2822 }, { "epoch": 0.59, "learning_rate": 1.4952063827944385e-05, "loss": 1.283, "step": 2823 }, { "epoch": 0.59, "learning_rate": 1.4938879661740495e-05, "loss": 1.2384, "step": 2824 }, { "epoch": 0.59, "learning_rate": 1.4925697844932185e-05, "loss": 1.2484, "step": 2825 }, { "epoch": 0.59, "learning_rate": 1.4912518383638512e-05, "loss": 1.2424, "step": 2826 }, { "epoch": 0.59, "learning_rate": 1.4899341283977457e-05, "loss": 1.2382, "step": 2827 }, { "epoch": 0.59, "learning_rate": 1.4886166552065873e-05, "loss": 1.2815, "step": 2828 }, { "epoch": 0.6, "learning_rate": 1.4872994194019553e-05, "loss": 1.2739, "step": 2829 }, { "epoch": 0.6, "learning_rate": 1.4859824215953154e-05, "loss": 1.2352, "step": 2830 }, { "epoch": 0.6, "learning_rate": 1.4846656623980234e-05, "loss": 1.211, "step": 2831 }, { "epoch": 0.6, "learning_rate": 1.4833491424213268e-05, "loss": 1.1948, "step": 2832 }, { "epoch": 0.6, "learning_rate": 1.4820328622763584e-05, "loss": 1.2689, "step": 2833 }, { "epoch": 0.6, "learning_rate": 1.4807168225741433e-05, "loss": 1.2698, "step": 2834 }, { "epoch": 0.6, "learning_rate": 1.4794010239255925e-05, "loss": 1.2735, "step": 2835 }, { "epoch": 0.6, "learning_rate": 1.4780854669415053e-05, "loss": 1.2473, "step": 2836 }, { "epoch": 0.6, "learning_rate": 1.4767701522325708e-05, "loss": 1.3154, "step": 2837 }, { "epoch": 0.6, "learning_rate": 1.4754550804093633e-05, "loss": 1.228, "step": 2838 }, { "epoch": 0.6, "learning_rate": 1.4741402520823442e-05, "loss": 1.2206, "step": 2839 }, { "epoch": 0.6, "learning_rate": 1.4728256678618652e-05, "loss": 1.2807, "step": 2840 }, { "epoch": 0.6, "learning_rate": 1.47151132835816e-05, "loss": 1.2791, "step": 2841 }, { "epoch": 0.6, "learning_rate": 1.4701972341813533e-05, "loss": 1.23, "step": 2842 }, { "epoch": 0.6, "learning_rate": 1.4688833859414529e-05, "loss": 1.2019, "step": 2843 }, { "epoch": 0.6, "learning_rate": 1.467569784248352e-05, "loss": 1.263, "step": 2844 }, { "epoch": 0.6, "learning_rate": 1.4662564297118325e-05, "loss": 1.2614, "step": 2845 }, { "epoch": 0.6, "learning_rate": 1.4649433229415588e-05, "loss": 1.2007, "step": 2846 }, { "epoch": 0.6, "learning_rate": 1.4636304645470807e-05, "loss": 1.2403, "step": 2847 }, { "epoch": 0.6, "learning_rate": 1.4623178551378346e-05, "loss": 1.2279, "step": 2848 }, { "epoch": 0.6, "learning_rate": 1.4610054953231379e-05, "loss": 1.2979, "step": 2849 }, { "epoch": 0.6, "learning_rate": 1.4596933857121963e-05, "loss": 1.2706, "step": 2850 }, { "epoch": 0.6, "learning_rate": 1.4583815269140957e-05, "loss": 1.2874, "step": 2851 }, { "epoch": 0.6, "learning_rate": 1.4570699195378071e-05, "loss": 1.2471, "step": 2852 }, { "epoch": 0.6, "learning_rate": 1.4557585641921859e-05, "loss": 1.2704, "step": 2853 }, { "epoch": 0.6, "learning_rate": 1.4544474614859683e-05, "loss": 1.2611, "step": 2854 }, { "epoch": 0.6, "learning_rate": 1.4531366120277736e-05, "loss": 1.2439, "step": 2855 }, { "epoch": 0.6, "learning_rate": 1.4518260164261058e-05, "loss": 1.273, "step": 2856 }, { "epoch": 0.6, "learning_rate": 1.4505156752893488e-05, "loss": 1.2756, "step": 2857 }, { "epoch": 0.6, "learning_rate": 1.4492055892257688e-05, "loss": 1.2911, "step": 2858 }, { "epoch": 0.6, "learning_rate": 1.4478957588435148e-05, "loss": 1.2161, "step": 2859 }, { "epoch": 0.6, "learning_rate": 1.4465861847506142e-05, "loss": 1.2703, "step": 2860 }, { "epoch": 0.6, "learning_rate": 1.4452768675549798e-05, "loss": 1.2377, "step": 2861 }, { "epoch": 0.6, "learning_rate": 1.4439678078644004e-05, "loss": 1.2483, "step": 2862 }, { "epoch": 0.6, "learning_rate": 1.4426590062865497e-05, "loss": 1.242, "step": 2863 }, { "epoch": 0.6, "learning_rate": 1.4413504634289785e-05, "loss": 1.2969, "step": 2864 }, { "epoch": 0.6, "learning_rate": 1.4400421798991178e-05, "loss": 1.2291, "step": 2865 }, { "epoch": 0.6, "learning_rate": 1.4387341563042801e-05, "loss": 1.2642, "step": 2866 }, { "epoch": 0.6, "learning_rate": 1.4374263932516557e-05, "loss": 1.3157, "step": 2867 }, { "epoch": 0.6, "learning_rate": 1.4361188913483132e-05, "loss": 1.2582, "step": 2868 }, { "epoch": 0.6, "learning_rate": 1.4348116512012024e-05, "loss": 1.2395, "step": 2869 }, { "epoch": 0.6, "learning_rate": 1.4335046734171499e-05, "loss": 1.2779, "step": 2870 }, { "epoch": 0.6, "learning_rate": 1.4321979586028607e-05, "loss": 1.2751, "step": 2871 }, { "epoch": 0.6, "learning_rate": 1.4308915073649182e-05, "loss": 1.2613, "step": 2872 }, { "epoch": 0.6, "learning_rate": 1.4295853203097823e-05, "loss": 1.2756, "step": 2873 }, { "epoch": 0.6, "learning_rate": 1.4282793980437923e-05, "loss": 1.2659, "step": 2874 }, { "epoch": 0.6, "learning_rate": 1.4269737411731627e-05, "loss": 1.3003, "step": 2875 }, { "epoch": 0.6, "learning_rate": 1.425668350303985e-05, "loss": 1.237, "step": 2876 }, { "epoch": 0.61, "learning_rate": 1.4243632260422292e-05, "loss": 1.2457, "step": 2877 }, { "epoch": 0.61, "learning_rate": 1.4230583689937381e-05, "loss": 1.2737, "step": 2878 }, { "epoch": 0.61, "learning_rate": 1.4217537797642343e-05, "loss": 1.2034, "step": 2879 }, { "epoch": 0.61, "learning_rate": 1.4204494589593127e-05, "loss": 1.2646, "step": 2880 }, { "epoch": 0.61, "learning_rate": 1.4191454071844457e-05, "loss": 1.2501, "step": 2881 }, { "epoch": 0.61, "learning_rate": 1.4178416250449804e-05, "loss": 1.2616, "step": 2882 }, { "epoch": 0.61, "learning_rate": 1.4165381131461388e-05, "loss": 1.2314, "step": 2883 }, { "epoch": 0.61, "learning_rate": 1.4152348720930156e-05, "loss": 1.2869, "step": 2884 }, { "epoch": 0.61, "learning_rate": 1.4139319024905836e-05, "loss": 1.2607, "step": 2885 }, { "epoch": 0.61, "learning_rate": 1.412629204943685e-05, "loss": 1.2817, "step": 2886 }, { "epoch": 0.61, "learning_rate": 1.4113267800570402e-05, "loss": 1.2456, "step": 2887 }, { "epoch": 0.61, "learning_rate": 1.41002462843524e-05, "loss": 1.2291, "step": 2888 }, { "epoch": 0.61, "learning_rate": 1.4087227506827482e-05, "loss": 1.2301, "step": 2889 }, { "epoch": 0.61, "learning_rate": 1.4074211474039046e-05, "loss": 1.2349, "step": 2890 }, { "epoch": 0.61, "learning_rate": 1.406119819202917e-05, "loss": 1.2626, "step": 2891 }, { "epoch": 0.61, "learning_rate": 1.4048187666838707e-05, "loss": 1.2817, "step": 2892 }, { "epoch": 0.61, "learning_rate": 1.4035179904507184e-05, "loss": 1.3066, "step": 2893 }, { "epoch": 0.61, "learning_rate": 1.4022174911072868e-05, "loss": 1.2531, "step": 2894 }, { "epoch": 0.61, "learning_rate": 1.4009172692572743e-05, "loss": 1.2722, "step": 2895 }, { "epoch": 0.61, "learning_rate": 1.39961732550425e-05, "loss": 1.2629, "step": 2896 }, { "epoch": 0.61, "learning_rate": 1.3983176604516526e-05, "loss": 1.2283, "step": 2897 }, { "epoch": 0.61, "learning_rate": 1.3970182747027944e-05, "loss": 1.3009, "step": 2898 }, { "epoch": 0.61, "learning_rate": 1.3957191688608544e-05, "loss": 1.2429, "step": 2899 }, { "epoch": 0.61, "learning_rate": 1.3944203435288857e-05, "loss": 1.2579, "step": 2900 }, { "epoch": 0.61, "learning_rate": 1.3931217993098076e-05, "loss": 1.2907, "step": 2901 }, { "epoch": 0.61, "learning_rate": 1.3918235368064102e-05, "loss": 1.2069, "step": 2902 }, { "epoch": 0.61, "learning_rate": 1.3905255566213542e-05, "loss": 1.2855, "step": 2903 }, { "epoch": 0.61, "learning_rate": 1.3892278593571669e-05, "loss": 1.2529, "step": 2904 }, { "epoch": 0.61, "learning_rate": 1.3879304456162457e-05, "loss": 1.215, "step": 2905 }, { "epoch": 0.61, "learning_rate": 1.3866333160008562e-05, "loss": 1.2576, "step": 2906 }, { "epoch": 0.61, "learning_rate": 1.3853364711131324e-05, "loss": 1.1791, "step": 2907 }, { "epoch": 0.61, "learning_rate": 1.3840399115550748e-05, "loss": 1.2156, "step": 2908 }, { "epoch": 0.61, "learning_rate": 1.3827436379285537e-05, "loss": 1.2298, "step": 2909 }, { "epoch": 0.61, "learning_rate": 1.3814476508353036e-05, "loss": 1.2701, "step": 2910 }, { "epoch": 0.61, "learning_rate": 1.3801519508769295e-05, "loss": 1.253, "step": 2911 }, { "epoch": 0.61, "learning_rate": 1.3788565386548996e-05, "loss": 1.2261, "step": 2912 }, { "epoch": 0.61, "learning_rate": 1.3775614147705521e-05, "loss": 1.2228, "step": 2913 }, { "epoch": 0.61, "learning_rate": 1.3762665798250887e-05, "loss": 1.263, "step": 2914 }, { "epoch": 0.61, "learning_rate": 1.3749720344195768e-05, "loss": 1.243, "step": 2915 }, { "epoch": 0.61, "learning_rate": 1.373677779154952e-05, "loss": 1.2675, "step": 2916 }, { "epoch": 0.61, "learning_rate": 1.3723838146320128e-05, "loss": 1.2731, "step": 2917 }, { "epoch": 0.61, "learning_rate": 1.3710901414514235e-05, "loss": 1.2577, "step": 2918 }, { "epoch": 0.61, "learning_rate": 1.3697967602137135e-05, "loss": 1.3023, "step": 2919 }, { "epoch": 0.61, "learning_rate": 1.368503671519276e-05, "loss": 1.2115, "step": 2920 }, { "epoch": 0.61, "learning_rate": 1.3672108759683694e-05, "loss": 1.2979, "step": 2921 }, { "epoch": 0.61, "learning_rate": 1.3659183741611154e-05, "loss": 1.2026, "step": 2922 }, { "epoch": 0.61, "learning_rate": 1.3646261666974976e-05, "loss": 1.2819, "step": 2923 }, { "epoch": 0.62, "learning_rate": 1.3633342541773673e-05, "loss": 1.2395, "step": 2924 }, { "epoch": 0.62, "learning_rate": 1.3620426372004353e-05, "loss": 1.2855, "step": 2925 }, { "epoch": 0.62, "learning_rate": 1.360751316366275e-05, "loss": 1.2285, "step": 2926 }, { "epoch": 0.62, "learning_rate": 1.3594602922743252e-05, "loss": 1.2752, "step": 2927 }, { "epoch": 0.62, "learning_rate": 1.358169565523884e-05, "loss": 1.2844, "step": 2928 }, { "epoch": 0.62, "learning_rate": 1.356879136714114e-05, "loss": 1.2648, "step": 2929 }, { "epoch": 0.62, "learning_rate": 1.3555890064440374e-05, "loss": 1.2449, "step": 2930 }, { "epoch": 0.62, "learning_rate": 1.3542991753125387e-05, "loss": 1.2562, "step": 2931 }, { "epoch": 0.62, "learning_rate": 1.3530096439183637e-05, "loss": 1.2648, "step": 2932 }, { "epoch": 0.62, "learning_rate": 1.3517204128601193e-05, "loss": 1.3028, "step": 2933 }, { "epoch": 0.62, "learning_rate": 1.3504314827362715e-05, "loss": 1.2734, "step": 2934 }, { "epoch": 0.62, "learning_rate": 1.3491428541451487e-05, "loss": 1.2486, "step": 2935 }, { "epoch": 0.62, "learning_rate": 1.3478545276849373e-05, "loss": 1.2236, "step": 2936 }, { "epoch": 0.62, "learning_rate": 1.3465665039536857e-05, "loss": 1.2125, "step": 2937 }, { "epoch": 0.62, "learning_rate": 1.3452787835492998e-05, "loss": 1.2841, "step": 2938 }, { "epoch": 0.62, "learning_rate": 1.3439913670695445e-05, "loss": 1.272, "step": 2939 }, { "epoch": 0.62, "learning_rate": 1.3427042551120461e-05, "loss": 1.2461, "step": 2940 }, { "epoch": 0.62, "learning_rate": 1.3414174482742865e-05, "loss": 1.2438, "step": 2941 }, { "epoch": 0.62, "learning_rate": 1.3401309471536092e-05, "loss": 1.3218, "step": 2942 }, { "epoch": 0.62, "learning_rate": 1.3388447523472122e-05, "loss": 1.2427, "step": 2943 }, { "epoch": 0.62, "learning_rate": 1.337558864452154e-05, "loss": 1.2186, "step": 2944 }, { "epoch": 0.62, "learning_rate": 1.3362732840653494e-05, "loss": 1.281, "step": 2945 }, { "epoch": 0.62, "learning_rate": 1.3349880117835716e-05, "loss": 1.2928, "step": 2946 }, { "epoch": 0.62, "learning_rate": 1.3337030482034485e-05, "loss": 1.238, "step": 2947 }, { "epoch": 0.62, "learning_rate": 1.332418393921468e-05, "loss": 1.2709, "step": 2948 }, { "epoch": 0.62, "learning_rate": 1.3311340495339704e-05, "loss": 1.2721, "step": 2949 }, { "epoch": 0.62, "learning_rate": 1.3298500156371565e-05, "loss": 1.2556, "step": 2950 }, { "epoch": 0.62, "learning_rate": 1.32856629282708e-05, "loss": 1.2359, "step": 2951 }, { "epoch": 0.62, "learning_rate": 1.3272828816996498e-05, "loss": 1.1736, "step": 2952 }, { "epoch": 0.62, "learning_rate": 1.3259997828506333e-05, "loss": 1.236, "step": 2953 }, { "epoch": 0.62, "learning_rate": 1.3247169968756494e-05, "loss": 1.2504, "step": 2954 }, { "epoch": 0.62, "learning_rate": 1.3234345243701743e-05, "loss": 1.2845, "step": 2955 }, { "epoch": 0.62, "learning_rate": 1.322152365929537e-05, "loss": 1.3128, "step": 2956 }, { "epoch": 0.62, "learning_rate": 1.320870522148921e-05, "loss": 1.2583, "step": 2957 }, { "epoch": 0.62, "learning_rate": 1.3195889936233662e-05, "loss": 1.2285, "step": 2958 }, { "epoch": 0.62, "learning_rate": 1.3183077809477625e-05, "loss": 1.244, "step": 2959 }, { "epoch": 0.62, "learning_rate": 1.3170268847168541e-05, "loss": 1.2554, "step": 2960 }, { "epoch": 0.62, "learning_rate": 1.315746305525241e-05, "loss": 1.2997, "step": 2961 }, { "epoch": 0.62, "learning_rate": 1.3144660439673727e-05, "loss": 1.2706, "step": 2962 }, { "epoch": 0.62, "learning_rate": 1.3131861006375519e-05, "loss": 1.2711, "step": 2963 }, { "epoch": 0.62, "learning_rate": 1.311906476129936e-05, "loss": 1.2799, "step": 2964 }, { "epoch": 0.62, "learning_rate": 1.3106271710385312e-05, "loss": 1.2707, "step": 2965 }, { "epoch": 0.62, "learning_rate": 1.3093481859571981e-05, "loss": 1.2544, "step": 2966 }, { "epoch": 0.62, "learning_rate": 1.3080695214796464e-05, "loss": 1.3039, "step": 2967 }, { "epoch": 0.62, "learning_rate": 1.3067911781994384e-05, "loss": 1.268, "step": 2968 }, { "epoch": 0.62, "learning_rate": 1.3055131567099872e-05, "loss": 1.2769, "step": 2969 }, { "epoch": 0.62, "learning_rate": 1.3042354576045559e-05, "loss": 1.2833, "step": 2970 }, { "epoch": 0.62, "learning_rate": 1.3029580814762596e-05, "loss": 1.3312, "step": 2971 }, { "epoch": 0.63, "learning_rate": 1.3016810289180615e-05, "loss": 1.2674, "step": 2972 }, { "epoch": 0.63, "learning_rate": 1.3004043005227742e-05, "loss": 1.2727, "step": 2973 }, { "epoch": 0.63, "learning_rate": 1.2991278968830625e-05, "loss": 1.2411, "step": 2974 }, { "epoch": 0.63, "learning_rate": 1.2978518185914388e-05, "loss": 1.2056, "step": 2975 }, { "epoch": 0.63, "learning_rate": 1.296576066240263e-05, "loss": 1.2812, "step": 2976 }, { "epoch": 0.63, "learning_rate": 1.2953006404217474e-05, "loss": 1.2758, "step": 2977 }, { "epoch": 0.63, "learning_rate": 1.2940255417279486e-05, "loss": 1.2314, "step": 2978 }, { "epoch": 0.63, "learning_rate": 1.2927507707507751e-05, "loss": 1.2708, "step": 2979 }, { "epoch": 0.63, "learning_rate": 1.2914763280819804e-05, "loss": 1.2673, "step": 2980 }, { "epoch": 0.63, "learning_rate": 1.2902022143131668e-05, "loss": 1.2948, "step": 2981 }, { "epoch": 0.63, "learning_rate": 1.2889284300357847e-05, "loss": 1.19, "step": 2982 }, { "epoch": 0.63, "learning_rate": 1.28765497584113e-05, "loss": 1.2836, "step": 2983 }, { "epoch": 0.63, "learning_rate": 1.2863818523203452e-05, "loss": 1.2801, "step": 2984 }, { "epoch": 0.63, "learning_rate": 1.2851090600644223e-05, "loss": 1.2242, "step": 2985 }, { "epoch": 0.63, "learning_rate": 1.2838365996641949e-05, "loss": 1.214, "step": 2986 }, { "epoch": 0.63, "learning_rate": 1.2825644717103472e-05, "loss": 1.2517, "step": 2987 }, { "epoch": 0.63, "learning_rate": 1.281292676793406e-05, "loss": 1.2531, "step": 2988 }, { "epoch": 0.63, "learning_rate": 1.2800212155037437e-05, "loss": 1.2424, "step": 2989 }, { "epoch": 0.63, "learning_rate": 1.27875008843158e-05, "loss": 1.2163, "step": 2990 }, { "epoch": 0.63, "learning_rate": 1.2774792961669764e-05, "loss": 1.2368, "step": 2991 }, { "epoch": 0.63, "learning_rate": 1.2762088392998417e-05, "loss": 1.2562, "step": 2992 }, { "epoch": 0.63, "learning_rate": 1.2749387184199283e-05, "loss": 1.2586, "step": 2993 }, { "epoch": 0.63, "learning_rate": 1.2736689341168304e-05, "loss": 1.2376, "step": 2994 }, { "epoch": 0.63, "learning_rate": 1.2723994869799898e-05, "loss": 1.276, "step": 2995 }, { "epoch": 0.63, "learning_rate": 1.2711303775986888e-05, "loss": 1.2508, "step": 2996 }, { "epoch": 0.63, "learning_rate": 1.2698616065620528e-05, "loss": 1.2692, "step": 2997 }, { "epoch": 0.63, "learning_rate": 1.2685931744590536e-05, "loss": 1.2778, "step": 2998 }, { "epoch": 0.63, "learning_rate": 1.267325081878501e-05, "loss": 1.2686, "step": 2999 }, { "epoch": 0.63, "learning_rate": 1.2660573294090512e-05, "loss": 1.1985, "step": 3000 }, { "epoch": 0.63, "learning_rate": 1.2647899176392003e-05, "loss": 1.2766, "step": 3001 }, { "epoch": 0.63, "learning_rate": 1.2635228471572853e-05, "loss": 1.2287, "step": 3002 }, { "epoch": 0.63, "learning_rate": 1.2622561185514886e-05, "loss": 1.2832, "step": 3003 }, { "epoch": 0.63, "learning_rate": 1.2609897324098297e-05, "loss": 1.2815, "step": 3004 }, { "epoch": 0.63, "learning_rate": 1.2597236893201712e-05, "loss": 1.2282, "step": 3005 }, { "epoch": 0.63, "learning_rate": 1.2584579898702175e-05, "loss": 1.2733, "step": 3006 }, { "epoch": 0.63, "learning_rate": 1.25719263464751e-05, "loss": 1.2478, "step": 3007 }, { "epoch": 0.63, "learning_rate": 1.2559276242394347e-05, "loss": 1.2505, "step": 3008 }, { "epoch": 0.63, "learning_rate": 1.254662959233214e-05, "loss": 1.2536, "step": 3009 }, { "epoch": 0.63, "learning_rate": 1.2533986402159113e-05, "loss": 1.226, "step": 3010 }, { "epoch": 0.63, "learning_rate": 1.25213466777443e-05, "loss": 1.2879, "step": 3011 }, { "epoch": 0.63, "learning_rate": 1.2508710424955117e-05, "loss": 1.2379, "step": 3012 }, { "epoch": 0.63, "learning_rate": 1.249607764965736e-05, "loss": 1.2265, "step": 3013 }, { "epoch": 0.63, "learning_rate": 1.2483448357715242e-05, "loss": 1.2394, "step": 3014 }, { "epoch": 0.63, "learning_rate": 1.2470822554991321e-05, "loss": 1.2516, "step": 3015 }, { "epoch": 0.63, "learning_rate": 1.2458200247346569e-05, "loss": 1.2821, "step": 3016 }, { "epoch": 0.63, "learning_rate": 1.2445581440640312e-05, "loss": 1.2852, "step": 3017 }, { "epoch": 0.63, "learning_rate": 1.243296614073025e-05, "loss": 1.1998, "step": 3018 }, { "epoch": 0.64, "learning_rate": 1.2420354353472483e-05, "loss": 1.25, "step": 3019 }, { "epoch": 0.64, "learning_rate": 1.2407746084721444e-05, "loss": 1.2859, "step": 3020 }, { "epoch": 0.64, "learning_rate": 1.2395141340329966e-05, "loss": 1.2565, "step": 3021 }, { "epoch": 0.64, "learning_rate": 1.2382540126149218e-05, "loss": 1.2491, "step": 3022 }, { "epoch": 0.64, "learning_rate": 1.2369942448028738e-05, "loss": 1.2762, "step": 3023 }, { "epoch": 0.64, "learning_rate": 1.2357348311816444e-05, "loss": 1.2898, "step": 3024 }, { "epoch": 0.64, "learning_rate": 1.2344757723358583e-05, "loss": 1.2155, "step": 3025 }, { "epoch": 0.64, "learning_rate": 1.2332170688499753e-05, "loss": 1.2094, "step": 3026 }, { "epoch": 0.64, "learning_rate": 1.2319587213082931e-05, "loss": 1.2514, "step": 3027 }, { "epoch": 0.64, "learning_rate": 1.230700730294942e-05, "loss": 1.2304, "step": 3028 }, { "epoch": 0.64, "learning_rate": 1.229443096393887e-05, "loss": 1.2681, "step": 3029 }, { "epoch": 0.64, "learning_rate": 1.2281858201889283e-05, "loss": 1.2783, "step": 3030 }, { "epoch": 0.64, "learning_rate": 1.226928902263698e-05, "loss": 1.2335, "step": 3031 }, { "epoch": 0.64, "learning_rate": 1.2256723432016648e-05, "loss": 1.2521, "step": 3032 }, { "epoch": 0.64, "learning_rate": 1.2244161435861286e-05, "loss": 1.2283, "step": 3033 }, { "epoch": 0.64, "learning_rate": 1.2231603040002225e-05, "loss": 1.2738, "step": 3034 }, { "epoch": 0.64, "learning_rate": 1.2219048250269141e-05, "loss": 1.2477, "step": 3035 }, { "epoch": 0.64, "learning_rate": 1.2206497072490014e-05, "loss": 1.2762, "step": 3036 }, { "epoch": 0.64, "learning_rate": 1.2193949512491172e-05, "loss": 1.1641, "step": 3037 }, { "epoch": 0.64, "learning_rate": 1.2181405576097247e-05, "loss": 1.2404, "step": 3038 }, { "epoch": 0.64, "learning_rate": 1.2168865269131182e-05, "loss": 1.2503, "step": 3039 }, { "epoch": 0.64, "learning_rate": 1.2156328597414258e-05, "loss": 1.2949, "step": 3040 }, { "epoch": 0.64, "learning_rate": 1.2143795566766054e-05, "loss": 1.2353, "step": 3041 }, { "epoch": 0.64, "learning_rate": 1.2131266183004455e-05, "loss": 1.2002, "step": 3042 }, { "epoch": 0.64, "learning_rate": 1.2118740451945668e-05, "loss": 1.3042, "step": 3043 }, { "epoch": 0.64, "learning_rate": 1.2106218379404187e-05, "loss": 1.2446, "step": 3044 }, { "epoch": 0.64, "learning_rate": 1.2093699971192825e-05, "loss": 1.2387, "step": 3045 }, { "epoch": 0.64, "learning_rate": 1.208118523312268e-05, "loss": 1.2728, "step": 3046 }, { "epoch": 0.64, "learning_rate": 1.2068674171003146e-05, "loss": 1.2408, "step": 3047 }, { "epoch": 0.64, "learning_rate": 1.205616679064193e-05, "loss": 1.2971, "step": 3048 }, { "epoch": 0.64, "learning_rate": 1.2043663097844999e-05, "loss": 1.225, "step": 3049 }, { "epoch": 0.64, "learning_rate": 1.2031163098416644e-05, "loss": 1.2419, "step": 3050 }, { "epoch": 0.64, "learning_rate": 1.2018666798159408e-05, "loss": 1.2404, "step": 3051 }, { "epoch": 0.64, "learning_rate": 1.2006174202874141e-05, "loss": 1.2231, "step": 3052 }, { "epoch": 0.64, "learning_rate": 1.1993685318359956e-05, "loss": 1.2834, "step": 3053 }, { "epoch": 0.64, "learning_rate": 1.1981200150414262e-05, "loss": 1.2667, "step": 3054 }, { "epoch": 0.64, "learning_rate": 1.1968718704832716e-05, "loss": 1.2504, "step": 3055 }, { "epoch": 0.64, "learning_rate": 1.195624098740928e-05, "loss": 1.3046, "step": 3056 }, { "epoch": 0.64, "learning_rate": 1.1943767003936152e-05, "loss": 1.3131, "step": 3057 }, { "epoch": 0.64, "learning_rate": 1.1931296760203831e-05, "loss": 1.1847, "step": 3058 }, { "epoch": 0.64, "learning_rate": 1.1918830262001055e-05, "loss": 1.2474, "step": 3059 }, { "epoch": 0.64, "learning_rate": 1.1906367515114816e-05, "loss": 1.2154, "step": 3060 }, { "epoch": 0.64, "learning_rate": 1.1893908525330401e-05, "loss": 1.2043, "step": 3061 }, { "epoch": 0.64, "learning_rate": 1.1881453298431323e-05, "loss": 1.2553, "step": 3062 }, { "epoch": 0.64, "learning_rate": 1.1869001840199345e-05, "loss": 1.2315, "step": 3063 }, { "epoch": 0.64, "learning_rate": 1.1856554156414503e-05, "loss": 1.2944, "step": 3064 }, { "epoch": 0.64, "learning_rate": 1.1844110252855072e-05, "loss": 1.2535, "step": 3065 }, { "epoch": 0.64, "learning_rate": 1.1831670135297564e-05, "loss": 1.2273, "step": 3066 }, { "epoch": 0.65, "learning_rate": 1.1819233809516746e-05, "loss": 1.2651, "step": 3067 }, { "epoch": 0.65, "learning_rate": 1.1806801281285604e-05, "loss": 1.2971, "step": 3068 }, { "epoch": 0.65, "learning_rate": 1.1794372556375392e-05, "loss": 1.1915, "step": 3069 }, { "epoch": 0.65, "learning_rate": 1.178194764055557e-05, "loss": 1.2387, "step": 3070 }, { "epoch": 0.65, "learning_rate": 1.176952653959385e-05, "loss": 1.2562, "step": 3071 }, { "epoch": 0.65, "learning_rate": 1.1757109259256163e-05, "loss": 1.2709, "step": 3072 }, { "epoch": 0.65, "learning_rate": 1.174469580530666e-05, "loss": 1.2709, "step": 3073 }, { "epoch": 0.65, "learning_rate": 1.1732286183507738e-05, "loss": 1.2748, "step": 3074 }, { "epoch": 0.65, "learning_rate": 1.1719880399619987e-05, "loss": 1.2853, "step": 3075 }, { "epoch": 0.65, "learning_rate": 1.1707478459402236e-05, "loss": 1.2439, "step": 3076 }, { "epoch": 0.65, "learning_rate": 1.1695080368611526e-05, "loss": 1.2483, "step": 3077 }, { "epoch": 0.65, "learning_rate": 1.1682686133003105e-05, "loss": 1.2746, "step": 3078 }, { "epoch": 0.65, "learning_rate": 1.167029575833044e-05, "loss": 1.2671, "step": 3079 }, { "epoch": 0.65, "learning_rate": 1.1657909250345194e-05, "loss": 1.262, "step": 3080 }, { "epoch": 0.65, "learning_rate": 1.1645526614797235e-05, "loss": 1.233, "step": 3081 }, { "epoch": 0.65, "learning_rate": 1.1633147857434658e-05, "loss": 1.2542, "step": 3082 }, { "epoch": 0.65, "learning_rate": 1.1620772984003724e-05, "loss": 1.1922, "step": 3083 }, { "epoch": 0.65, "learning_rate": 1.1608402000248908e-05, "loss": 1.2216, "step": 3084 }, { "epoch": 0.65, "learning_rate": 1.1596034911912896e-05, "loss": 1.2636, "step": 3085 }, { "epoch": 0.65, "learning_rate": 1.1583671724736526e-05, "loss": 1.2652, "step": 3086 }, { "epoch": 0.65, "learning_rate": 1.157131244445886e-05, "loss": 1.2796, "step": 3087 }, { "epoch": 0.65, "learning_rate": 1.1558957076817135e-05, "loss": 1.2416, "step": 3088 }, { "epoch": 0.65, "learning_rate": 1.1546605627546752e-05, "loss": 1.2384, "step": 3089 }, { "epoch": 0.65, "learning_rate": 1.1534258102381332e-05, "loss": 1.2593, "step": 3090 }, { "epoch": 0.65, "learning_rate": 1.1521914507052646e-05, "loss": 1.2582, "step": 3091 }, { "epoch": 0.65, "learning_rate": 1.1509574847290641e-05, "loss": 1.2765, "step": 3092 }, { "epoch": 0.65, "learning_rate": 1.1497239128823456e-05, "loss": 1.2534, "step": 3093 }, { "epoch": 0.65, "learning_rate": 1.1484907357377378e-05, "loss": 1.2489, "step": 3094 }, { "epoch": 0.65, "learning_rate": 1.1472579538676883e-05, "loss": 1.2354, "step": 3095 }, { "epoch": 0.65, "learning_rate": 1.1460255678444598e-05, "loss": 1.2463, "step": 3096 }, { "epoch": 0.65, "learning_rate": 1.144793578240131e-05, "loss": 1.2898, "step": 3097 }, { "epoch": 0.65, "learning_rate": 1.1435619856265982e-05, "loss": 1.2528, "step": 3098 }, { "epoch": 0.65, "learning_rate": 1.1423307905755714e-05, "loss": 1.2369, "step": 3099 }, { "epoch": 0.65, "learning_rate": 1.1410999936585782e-05, "loss": 1.244, "step": 3100 }, { "epoch": 0.65, "learning_rate": 1.1398695954469598e-05, "loss": 1.2379, "step": 3101 }, { "epoch": 0.65, "learning_rate": 1.1386395965118715e-05, "loss": 1.2566, "step": 3102 }, { "epoch": 0.65, "learning_rate": 1.1374099974242867e-05, "loss": 1.208, "step": 3103 }, { "epoch": 0.65, "learning_rate": 1.13618079875499e-05, "loss": 1.2772, "step": 3104 }, { "epoch": 0.65, "learning_rate": 1.1349520010745802e-05, "loss": 1.2572, "step": 3105 }, { "epoch": 0.65, "learning_rate": 1.1337236049534726e-05, "loss": 1.2293, "step": 3106 }, { "epoch": 0.65, "learning_rate": 1.1324956109618927e-05, "loss": 1.2289, "step": 3107 }, { "epoch": 0.65, "learning_rate": 1.1312680196698817e-05, "loss": 1.2304, "step": 3108 }, { "epoch": 0.65, "learning_rate": 1.1300408316472944e-05, "loss": 1.2079, "step": 3109 }, { "epoch": 0.65, "learning_rate": 1.1288140474637953e-05, "loss": 1.2966, "step": 3110 }, { "epoch": 0.65, "learning_rate": 1.127587667688865e-05, "loss": 1.2413, "step": 3111 }, { "epoch": 0.65, "learning_rate": 1.126361692891794e-05, "loss": 1.2613, "step": 3112 }, { "epoch": 0.65, "learning_rate": 1.1251361236416845e-05, "loss": 1.2146, "step": 3113 }, { "epoch": 0.65, "learning_rate": 1.1239109605074527e-05, "loss": 1.2915, "step": 3114 }, { "epoch": 0.66, "learning_rate": 1.1226862040578244e-05, "loss": 1.2645, "step": 3115 }, { "epoch": 0.66, "learning_rate": 1.1214618548613379e-05, "loss": 1.2736, "step": 3116 }, { "epoch": 0.66, "learning_rate": 1.1202379134863412e-05, "loss": 1.2165, "step": 3117 }, { "epoch": 0.66, "learning_rate": 1.1190143805009934e-05, "loss": 1.2914, "step": 3118 }, { "epoch": 0.66, "learning_rate": 1.117791256473265e-05, "loss": 1.2711, "step": 3119 }, { "epoch": 0.66, "learning_rate": 1.1165685419709353e-05, "loss": 1.2913, "step": 3120 }, { "epoch": 0.66, "learning_rate": 1.1153462375615934e-05, "loss": 1.2376, "step": 3121 }, { "epoch": 0.66, "learning_rate": 1.1141243438126403e-05, "loss": 1.2159, "step": 3122 }, { "epoch": 0.66, "learning_rate": 1.1129028612912832e-05, "loss": 1.2791, "step": 3123 }, { "epoch": 0.66, "learning_rate": 1.1116817905645411e-05, "loss": 1.2404, "step": 3124 }, { "epoch": 0.66, "learning_rate": 1.1104611321992404e-05, "loss": 1.1767, "step": 3125 }, { "epoch": 0.66, "learning_rate": 1.1092408867620155e-05, "loss": 1.2579, "step": 3126 }, { "epoch": 0.66, "learning_rate": 1.1080210548193113e-05, "loss": 1.2654, "step": 3127 }, { "epoch": 0.66, "learning_rate": 1.1068016369373784e-05, "loss": 1.2605, "step": 3128 }, { "epoch": 0.66, "learning_rate": 1.1055826336822775e-05, "loss": 1.2506, "step": 3129 }, { "epoch": 0.66, "learning_rate": 1.1043640456198745e-05, "loss": 1.2704, "step": 3130 }, { "epoch": 0.66, "learning_rate": 1.1031458733158434e-05, "loss": 1.2567, "step": 3131 }, { "epoch": 0.66, "learning_rate": 1.101928117335666e-05, "loss": 1.2427, "step": 3132 }, { "epoch": 0.66, "learning_rate": 1.100710778244631e-05, "loss": 1.3009, "step": 3133 }, { "epoch": 0.66, "learning_rate": 1.0994938566078315e-05, "loss": 1.333, "step": 3134 }, { "epoch": 0.66, "learning_rate": 1.0982773529901696e-05, "loss": 1.2514, "step": 3135 }, { "epoch": 0.66, "learning_rate": 1.0970612679563501e-05, "loss": 1.2851, "step": 3136 }, { "epoch": 0.66, "learning_rate": 1.0958456020708875e-05, "loss": 1.2252, "step": 3137 }, { "epoch": 0.66, "learning_rate": 1.0946303558980981e-05, "loss": 1.2286, "step": 3138 }, { "epoch": 0.66, "learning_rate": 1.0934155300021048e-05, "loss": 1.2222, "step": 3139 }, { "epoch": 0.66, "learning_rate": 1.0922011249468362e-05, "loss": 1.276, "step": 3140 }, { "epoch": 0.66, "learning_rate": 1.0909871412960245e-05, "loss": 1.2708, "step": 3141 }, { "epoch": 0.66, "learning_rate": 1.0897735796132056e-05, "loss": 1.2226, "step": 3142 }, { "epoch": 0.66, "learning_rate": 1.0885604404617221e-05, "loss": 1.2381, "step": 3143 }, { "epoch": 0.66, "learning_rate": 1.087347724404717e-05, "loss": 1.2114, "step": 3144 }, { "epoch": 0.66, "learning_rate": 1.086135432005141e-05, "loss": 1.2417, "step": 3145 }, { "epoch": 0.66, "learning_rate": 1.0849235638257442e-05, "loss": 1.2634, "step": 3146 }, { "epoch": 0.66, "learning_rate": 1.0837121204290812e-05, "loss": 1.2382, "step": 3147 }, { "epoch": 0.66, "learning_rate": 1.0825011023775111e-05, "loss": 1.2725, "step": 3148 }, { "epoch": 0.66, "learning_rate": 1.0812905102331927e-05, "loss": 1.2429, "step": 3149 }, { "epoch": 0.66, "learning_rate": 1.0800803445580896e-05, "loss": 1.2722, "step": 3150 }, { "epoch": 0.66, "learning_rate": 1.078870605913966e-05, "loss": 1.2428, "step": 3151 }, { "epoch": 0.66, "learning_rate": 1.0776612948623874e-05, "loss": 1.2101, "step": 3152 }, { "epoch": 0.66, "learning_rate": 1.0764524119647228e-05, "loss": 1.2547, "step": 3153 }, { "epoch": 0.66, "learning_rate": 1.0752439577821398e-05, "loss": 1.2091, "step": 3154 }, { "epoch": 0.66, "learning_rate": 1.0740359328756105e-05, "loss": 1.2598, "step": 3155 }, { "epoch": 0.66, "learning_rate": 1.0728283378059036e-05, "loss": 1.2172, "step": 3156 }, { "epoch": 0.66, "learning_rate": 1.0716211731335922e-05, "loss": 1.2211, "step": 3157 }, { "epoch": 0.66, "learning_rate": 1.0704144394190458e-05, "loss": 1.2192, "step": 3158 }, { "epoch": 0.66, "learning_rate": 1.0692081372224378e-05, "loss": 1.2634, "step": 3159 }, { "epoch": 0.66, "learning_rate": 1.0680022671037376e-05, "loss": 1.2721, "step": 3160 }, { "epoch": 0.66, "learning_rate": 1.0667968296227169e-05, "loss": 1.2578, "step": 3161 }, { "epoch": 0.67, "learning_rate": 1.0655918253389452e-05, "loss": 1.2648, "step": 3162 }, { "epoch": 0.67, "learning_rate": 1.06438725481179e-05, "loss": 1.2056, "step": 3163 }, { "epoch": 0.67, "learning_rate": 1.06318311860042e-05, "loss": 1.2825, "step": 3164 }, { "epoch": 0.67, "learning_rate": 1.0619794172637995e-05, "loss": 1.2496, "step": 3165 }, { "epoch": 0.67, "learning_rate": 1.0607761513606935e-05, "loss": 1.2619, "step": 3166 }, { "epoch": 0.67, "learning_rate": 1.0595733214496633e-05, "loss": 1.251, "step": 3167 }, { "epoch": 0.67, "learning_rate": 1.0583709280890668e-05, "loss": 1.2337, "step": 3168 }, { "epoch": 0.67, "learning_rate": 1.0571689718370629e-05, "loss": 1.2735, "step": 3169 }, { "epoch": 0.67, "learning_rate": 1.0559674532516033e-05, "loss": 1.2749, "step": 3170 }, { "epoch": 0.67, "learning_rate": 1.0547663728904392e-05, "loss": 1.2735, "step": 3171 }, { "epoch": 0.67, "learning_rate": 1.0535657313111183e-05, "loss": 1.2379, "step": 3172 }, { "epoch": 0.67, "learning_rate": 1.0523655290709825e-05, "loss": 1.2867, "step": 3173 }, { "epoch": 0.67, "learning_rate": 1.0511657667271731e-05, "loss": 1.2593, "step": 3174 }, { "epoch": 0.67, "learning_rate": 1.0499664448366245e-05, "loss": 1.246, "step": 3175 }, { "epoch": 0.67, "learning_rate": 1.0487675639560664e-05, "loss": 1.3067, "step": 3176 }, { "epoch": 0.67, "learning_rate": 1.0475691246420267e-05, "loss": 1.2627, "step": 3177 }, { "epoch": 0.67, "learning_rate": 1.0463711274508253e-05, "loss": 1.2061, "step": 3178 }, { "epoch": 0.67, "learning_rate": 1.045173572938579e-05, "loss": 1.2407, "step": 3179 }, { "epoch": 0.67, "learning_rate": 1.0439764616611972e-05, "loss": 1.2155, "step": 3180 }, { "epoch": 0.67, "learning_rate": 1.0427797941743854e-05, "loss": 1.2505, "step": 3181 }, { "epoch": 0.67, "learning_rate": 1.041583571033641e-05, "loss": 1.2293, "step": 3182 }, { "epoch": 0.67, "learning_rate": 1.0403877927942582e-05, "loss": 1.2287, "step": 3183 }, { "epoch": 0.67, "learning_rate": 1.0391924600113211e-05, "loss": 1.1778, "step": 3184 }, { "epoch": 0.67, "learning_rate": 1.0379975732397096e-05, "loss": 1.2483, "step": 3185 }, { "epoch": 0.67, "learning_rate": 1.0368031330340948e-05, "loss": 1.1827, "step": 3186 }, { "epoch": 0.67, "learning_rate": 1.0356091399489431e-05, "loss": 1.2479, "step": 3187 }, { "epoch": 0.67, "learning_rate": 1.0344155945385106e-05, "loss": 1.2303, "step": 3188 }, { "epoch": 0.67, "learning_rate": 1.0332224973568458e-05, "loss": 1.1771, "step": 3189 }, { "epoch": 0.67, "learning_rate": 1.0320298489577913e-05, "loss": 1.2381, "step": 3190 }, { "epoch": 0.67, "learning_rate": 1.03083764989498e-05, "loss": 1.2529, "step": 3191 }, { "epoch": 0.67, "learning_rate": 1.0296459007218345e-05, "loss": 1.2194, "step": 3192 }, { "epoch": 0.67, "learning_rate": 1.0284546019915727e-05, "loss": 1.2178, "step": 3193 }, { "epoch": 0.67, "learning_rate": 1.0272637542571988e-05, "loss": 1.228, "step": 3194 }, { "epoch": 0.67, "learning_rate": 1.026073358071512e-05, "loss": 1.2682, "step": 3195 }, { "epoch": 0.67, "learning_rate": 1.0248834139870985e-05, "loss": 1.2121, "step": 3196 }, { "epoch": 0.67, "learning_rate": 1.0236939225563351e-05, "loss": 1.2508, "step": 3197 }, { "epoch": 0.67, "learning_rate": 1.0225048843313914e-05, "loss": 1.216, "step": 3198 }, { "epoch": 0.67, "learning_rate": 1.021316299864223e-05, "loss": 1.1624, "step": 3199 }, { "epoch": 0.67, "learning_rate": 1.0201281697065757e-05, "loss": 1.2211, "step": 3200 }, { "epoch": 0.67, "learning_rate": 1.0189404944099867e-05, "loss": 1.2744, "step": 3201 }, { "epoch": 0.67, "learning_rate": 1.017753274525779e-05, "loss": 1.215, "step": 3202 }, { "epoch": 0.67, "learning_rate": 1.016566510605067e-05, "loss": 1.217, "step": 3203 }, { "epoch": 0.67, "learning_rate": 1.0153802031987504e-05, "loss": 1.25, "step": 3204 }, { "epoch": 0.67, "learning_rate": 1.0141943528575205e-05, "loss": 1.2136, "step": 3205 }, { "epoch": 0.67, "learning_rate": 1.0130089601318525e-05, "loss": 1.2514, "step": 3206 }, { "epoch": 0.67, "learning_rate": 1.0118240255720128e-05, "loss": 1.2354, "step": 3207 }, { "epoch": 0.67, "learning_rate": 1.0106395497280524e-05, "loss": 1.2434, "step": 3208 }, { "epoch": 0.67, "learning_rate": 1.0094555331498118e-05, "loss": 1.2592, "step": 3209 }, { "epoch": 0.68, "learning_rate": 1.0082719763869153e-05, "loss": 1.1914, "step": 3210 }, { "epoch": 0.68, "learning_rate": 1.0070888799887772e-05, "loss": 1.2514, "step": 3211 }, { "epoch": 0.68, "learning_rate": 1.0059062445045957e-05, "loss": 1.2839, "step": 3212 }, { "epoch": 0.68, "learning_rate": 1.0047240704833544e-05, "loss": 1.2125, "step": 3213 }, { "epoch": 0.68, "learning_rate": 1.0035423584738262e-05, "loss": 1.2671, "step": 3214 }, { "epoch": 0.68, "learning_rate": 1.0023611090245653e-05, "loss": 1.2732, "step": 3215 }, { "epoch": 0.68, "learning_rate": 1.0011803226839148e-05, "loss": 1.2836, "step": 3216 }, { "epoch": 0.68, "learning_rate": 1.0000000000000006e-05, "loss": 1.2816, "step": 3217 }, { "epoch": 0.68, "learning_rate": 9.988201415207327e-06, "loss": 1.2156, "step": 3218 }, { "epoch": 0.68, "learning_rate": 9.976407477938092e-06, "loss": 1.2123, "step": 3219 }, { "epoch": 0.68, "learning_rate": 9.96461819366709e-06, "loss": 1.2187, "step": 3220 }, { "epoch": 0.68, "learning_rate": 9.952833567866954e-06, "loss": 1.257, "step": 3221 }, { "epoch": 0.68, "learning_rate": 9.941053606008176e-06, "loss": 1.2064, "step": 3222 }, { "epoch": 0.68, "learning_rate": 9.929278313559054e-06, "loss": 1.2562, "step": 3223 }, { "epoch": 0.68, "learning_rate": 9.917507695985752e-06, "loss": 1.2479, "step": 3224 }, { "epoch": 0.68, "learning_rate": 9.905741758752234e-06, "loss": 1.2899, "step": 3225 }, { "epoch": 0.68, "learning_rate": 9.893980507320295e-06, "loss": 1.2659, "step": 3226 }, { "epoch": 0.68, "learning_rate": 9.882223947149583e-06, "loss": 1.2383, "step": 3227 }, { "epoch": 0.68, "learning_rate": 9.870472083697526e-06, "loss": 1.2468, "step": 3228 }, { "epoch": 0.68, "learning_rate": 9.858724922419413e-06, "loss": 1.2514, "step": 3229 }, { "epoch": 0.68, "learning_rate": 9.846982468768316e-06, "loss": 1.2496, "step": 3230 }, { "epoch": 0.68, "learning_rate": 9.83524472819515e-06, "loss": 1.2408, "step": 3231 }, { "epoch": 0.68, "learning_rate": 9.823511706148612e-06, "loss": 1.2441, "step": 3232 }, { "epoch": 0.68, "learning_rate": 9.811783408075244e-06, "loss": 1.2472, "step": 3233 }, { "epoch": 0.68, "learning_rate": 9.800059839419358e-06, "loss": 1.2464, "step": 3234 }, { "epoch": 0.68, "learning_rate": 9.788341005623107e-06, "loss": 1.2239, "step": 3235 }, { "epoch": 0.68, "learning_rate": 9.776626912126413e-06, "loss": 1.2192, "step": 3236 }, { "epoch": 0.68, "learning_rate": 9.764917564367025e-06, "loss": 1.2329, "step": 3237 }, { "epoch": 0.68, "learning_rate": 9.753212967780472e-06, "loss": 1.2609, "step": 3238 }, { "epoch": 0.68, "learning_rate": 9.741513127800072e-06, "loss": 1.2723, "step": 3239 }, { "epoch": 0.68, "learning_rate": 9.729818049856963e-06, "loss": 1.2593, "step": 3240 }, { "epoch": 0.68, "learning_rate": 9.718127739380043e-06, "loss": 1.2197, "step": 3241 }, { "epoch": 0.68, "learning_rate": 9.706442201796007e-06, "loss": 1.2573, "step": 3242 }, { "epoch": 0.68, "learning_rate": 9.694761442529345e-06, "loss": 1.196, "step": 3243 }, { "epoch": 0.68, "learning_rate": 9.683085467002306e-06, "loss": 1.2149, "step": 3244 }, { "epoch": 0.68, "learning_rate": 9.67141428063495e-06, "loss": 1.2241, "step": 3245 }, { "epoch": 0.68, "learning_rate": 9.659747888845087e-06, "loss": 1.2784, "step": 3246 }, { "epoch": 0.68, "learning_rate": 9.648086297048302e-06, "loss": 1.2894, "step": 3247 }, { "epoch": 0.68, "learning_rate": 9.636429510657974e-06, "loss": 1.2243, "step": 3248 }, { "epoch": 0.68, "learning_rate": 9.624777535085233e-06, "loss": 1.1729, "step": 3249 }, { "epoch": 0.68, "learning_rate": 9.61313037573897e-06, "loss": 1.2249, "step": 3250 }, { "epoch": 0.68, "learning_rate": 9.601488038025869e-06, "loss": 1.2333, "step": 3251 }, { "epoch": 0.68, "learning_rate": 9.589850527350337e-06, "loss": 1.2303, "step": 3252 }, { "epoch": 0.68, "learning_rate": 9.578217849114579e-06, "loss": 1.2754, "step": 3253 }, { "epoch": 0.68, "learning_rate": 9.566590008718524e-06, "loss": 1.2453, "step": 3254 }, { "epoch": 0.68, "learning_rate": 9.554967011559874e-06, "loss": 1.1803, "step": 3255 }, { "epoch": 0.68, "learning_rate": 9.54334886303409e-06, "loss": 1.2374, "step": 3256 }, { "epoch": 0.69, "learning_rate": 9.53173556853435e-06, "loss": 1.2788, "step": 3257 }, { "epoch": 0.69, "learning_rate": 9.520127133451619e-06, "loss": 1.1753, "step": 3258 }, { "epoch": 0.69, "learning_rate": 9.508523563174578e-06, "loss": 1.2168, "step": 3259 }, { "epoch": 0.69, "learning_rate": 9.496924863089652e-06, "loss": 1.2706, "step": 3260 }, { "epoch": 0.69, "learning_rate": 9.485331038581021e-06, "loss": 1.1991, "step": 3261 }, { "epoch": 0.69, "learning_rate": 9.473742095030588e-06, "loss": 1.2774, "step": 3262 }, { "epoch": 0.69, "learning_rate": 9.46215803781799e-06, "loss": 1.256, "step": 3263 }, { "epoch": 0.69, "learning_rate": 9.450578872320613e-06, "loss": 1.2213, "step": 3264 }, { "epoch": 0.69, "learning_rate": 9.439004603913542e-06, "loss": 1.2804, "step": 3265 }, { "epoch": 0.69, "learning_rate": 9.427435237969624e-06, "loss": 1.2548, "step": 3266 }, { "epoch": 0.69, "learning_rate": 9.415870779859405e-06, "loss": 1.2665, "step": 3267 }, { "epoch": 0.69, "learning_rate": 9.404311234951148e-06, "loss": 1.2561, "step": 3268 }, { "epoch": 0.69, "learning_rate": 9.392756608610871e-06, "loss": 1.239, "step": 3269 }, { "epoch": 0.69, "learning_rate": 9.381206906202268e-06, "loss": 1.2774, "step": 3270 }, { "epoch": 0.69, "learning_rate": 9.369662133086768e-06, "loss": 1.2445, "step": 3271 }, { "epoch": 0.69, "learning_rate": 9.358122294623514e-06, "loss": 1.2767, "step": 3272 }, { "epoch": 0.69, "learning_rate": 9.34658739616934e-06, "loss": 1.2274, "step": 3273 }, { "epoch": 0.69, "learning_rate": 9.335057443078817e-06, "loss": 1.2329, "step": 3274 }, { "epoch": 0.69, "learning_rate": 9.323532440704196e-06, "loss": 1.2633, "step": 3275 }, { "epoch": 0.69, "learning_rate": 9.312012394395423e-06, "loss": 1.2442, "step": 3276 }, { "epoch": 0.69, "learning_rate": 9.300497309500176e-06, "loss": 1.2476, "step": 3277 }, { "epoch": 0.69, "learning_rate": 9.288987191363799e-06, "loss": 1.2524, "step": 3278 }, { "epoch": 0.69, "learning_rate": 9.277482045329344e-06, "loss": 1.2107, "step": 3279 }, { "epoch": 0.69, "learning_rate": 9.265981876737566e-06, "loss": 1.235, "step": 3280 }, { "epoch": 0.69, "learning_rate": 9.254486690926878e-06, "loss": 1.2543, "step": 3281 }, { "epoch": 0.69, "learning_rate": 9.242996493233414e-06, "loss": 1.281, "step": 3282 }, { "epoch": 0.69, "learning_rate": 9.23151128899097e-06, "loss": 1.2388, "step": 3283 }, { "epoch": 0.69, "learning_rate": 9.220031083531026e-06, "loss": 1.2788, "step": 3284 }, { "epoch": 0.69, "learning_rate": 9.208555882182762e-06, "loss": 1.2302, "step": 3285 }, { "epoch": 0.69, "learning_rate": 9.197085690273e-06, "loss": 1.2492, "step": 3286 }, { "epoch": 0.69, "learning_rate": 9.185620513126275e-06, "loss": 1.2273, "step": 3287 }, { "epoch": 0.69, "learning_rate": 9.174160356064765e-06, "loss": 1.2494, "step": 3288 }, { "epoch": 0.69, "learning_rate": 9.162705224408326e-06, "loss": 1.2193, "step": 3289 }, { "epoch": 0.69, "learning_rate": 9.151255123474493e-06, "loss": 1.275, "step": 3290 }, { "epoch": 0.69, "learning_rate": 9.139810058578451e-06, "loss": 1.2566, "step": 3291 }, { "epoch": 0.69, "learning_rate": 9.128370035033046e-06, "loss": 1.2343, "step": 3292 }, { "epoch": 0.69, "learning_rate": 9.116935058148801e-06, "loss": 1.2535, "step": 3293 }, { "epoch": 0.69, "learning_rate": 9.105505133233876e-06, "loss": 1.2429, "step": 3294 }, { "epoch": 0.69, "learning_rate": 9.094080265594108e-06, "loss": 1.2582, "step": 3295 }, { "epoch": 0.69, "learning_rate": 9.082660460532961e-06, "loss": 1.2402, "step": 3296 }, { "epoch": 0.69, "learning_rate": 9.071245723351563e-06, "loss": 1.2902, "step": 3297 }, { "epoch": 0.69, "learning_rate": 9.059836059348696e-06, "loss": 1.2472, "step": 3298 }, { "epoch": 0.69, "learning_rate": 9.048431473820776e-06, "loss": 1.2695, "step": 3299 }, { "epoch": 0.69, "learning_rate": 9.037031972061854e-06, "loss": 1.2196, "step": 3300 }, { "epoch": 0.69, "learning_rate": 9.02563755936365e-06, "loss": 1.2352, "step": 3301 }, { "epoch": 0.69, "learning_rate": 9.014248241015484e-06, "loss": 1.2558, "step": 3302 }, { "epoch": 0.69, "learning_rate": 9.00286402230434e-06, "loss": 1.2743, "step": 3303 }, { "epoch": 0.69, "learning_rate": 8.991484908514835e-06, "loss": 1.2453, "step": 3304 }, { "epoch": 0.7, "learning_rate": 8.980110904929189e-06, "loss": 1.2606, "step": 3305 }, { "epoch": 0.7, "learning_rate": 8.968742016827283e-06, "loss": 1.2222, "step": 3306 }, { "epoch": 0.7, "learning_rate": 8.957378249486592e-06, "loss": 1.2485, "step": 3307 }, { "epoch": 0.7, "learning_rate": 8.946019608182245e-06, "loss": 1.3003, "step": 3308 }, { "epoch": 0.7, "learning_rate": 8.93466609818697e-06, "loss": 1.2299, "step": 3309 }, { "epoch": 0.7, "learning_rate": 8.92331772477111e-06, "loss": 1.2602, "step": 3310 }, { "epoch": 0.7, "learning_rate": 8.91197449320265e-06, "loss": 1.2744, "step": 3311 }, { "epoch": 0.7, "learning_rate": 8.900636408747156e-06, "loss": 1.2881, "step": 3312 }, { "epoch": 0.7, "learning_rate": 8.889303476667823e-06, "loss": 1.2524, "step": 3313 }, { "epoch": 0.7, "learning_rate": 8.877975702225457e-06, "loss": 1.2903, "step": 3314 }, { "epoch": 0.7, "learning_rate": 8.866653090678452e-06, "loss": 1.257, "step": 3315 }, { "epoch": 0.7, "learning_rate": 8.855335647282833e-06, "loss": 1.2713, "step": 3316 }, { "epoch": 0.7, "learning_rate": 8.844023377292198e-06, "loss": 1.2491, "step": 3317 }, { "epoch": 0.7, "learning_rate": 8.832716285957754e-06, "loss": 1.2851, "step": 3318 }, { "epoch": 0.7, "learning_rate": 8.821414378528314e-06, "loss": 1.2564, "step": 3319 }, { "epoch": 0.7, "learning_rate": 8.810117660250275e-06, "loss": 1.2711, "step": 3320 }, { "epoch": 0.7, "learning_rate": 8.798826136367613e-06, "loss": 1.2451, "step": 3321 }, { "epoch": 0.7, "learning_rate": 8.787539812121924e-06, "loss": 1.2593, "step": 3322 }, { "epoch": 0.7, "learning_rate": 8.776258692752355e-06, "loss": 1.2359, "step": 3323 }, { "epoch": 0.7, "learning_rate": 8.76498278349567e-06, "loss": 1.267, "step": 3324 }, { "epoch": 0.7, "learning_rate": 8.753712089586184e-06, "loss": 1.2868, "step": 3325 }, { "epoch": 0.7, "learning_rate": 8.74244661625582e-06, "loss": 1.2327, "step": 3326 }, { "epoch": 0.7, "learning_rate": 8.731186368734049e-06, "loss": 1.2488, "step": 3327 }, { "epoch": 0.7, "learning_rate": 8.719931352247944e-06, "loss": 1.2485, "step": 3328 }, { "epoch": 0.7, "learning_rate": 8.708681572022122e-06, "loss": 1.2541, "step": 3329 }, { "epoch": 0.7, "learning_rate": 8.697437033278797e-06, "loss": 1.2607, "step": 3330 }, { "epoch": 0.7, "learning_rate": 8.686197741237722e-06, "loss": 1.2404, "step": 3331 }, { "epoch": 0.7, "learning_rate": 8.674963701116243e-06, "loss": 1.2154, "step": 3332 }, { "epoch": 0.7, "learning_rate": 8.663734918129247e-06, "loss": 1.2064, "step": 3333 }, { "epoch": 0.7, "learning_rate": 8.652511397489181e-06, "loss": 1.2387, "step": 3334 }, { "epoch": 0.7, "learning_rate": 8.641293144406067e-06, "loss": 1.2578, "step": 3335 }, { "epoch": 0.7, "learning_rate": 8.630080164087456e-06, "loss": 1.2699, "step": 3336 }, { "epoch": 0.7, "learning_rate": 8.618872461738483e-06, "loss": 1.212, "step": 3337 }, { "epoch": 0.7, "learning_rate": 8.607670042561807e-06, "loss": 1.26, "step": 3338 }, { "epoch": 0.7, "learning_rate": 8.596472911757633e-06, "loss": 1.2571, "step": 3339 }, { "epoch": 0.7, "learning_rate": 8.58528107452374e-06, "loss": 1.2404, "step": 3340 }, { "epoch": 0.7, "learning_rate": 8.574094536055423e-06, "loss": 1.261, "step": 3341 }, { "epoch": 0.7, "learning_rate": 8.562913301545513e-06, "loss": 1.2811, "step": 3342 }, { "epoch": 0.7, "learning_rate": 8.55173737618441e-06, "loss": 1.2974, "step": 3343 }, { "epoch": 0.7, "learning_rate": 8.540566765160016e-06, "loss": 1.2205, "step": 3344 }, { "epoch": 0.7, "learning_rate": 8.529401473657795e-06, "loss": 1.2413, "step": 3345 }, { "epoch": 0.7, "learning_rate": 8.518241506860719e-06, "loss": 1.2487, "step": 3346 }, { "epoch": 0.7, "learning_rate": 8.507086869949287e-06, "loss": 1.2469, "step": 3347 }, { "epoch": 0.7, "learning_rate": 8.495937568101551e-06, "loss": 1.2834, "step": 3348 }, { "epoch": 0.7, "learning_rate": 8.484793606493054e-06, "loss": 1.2322, "step": 3349 }, { "epoch": 0.7, "learning_rate": 8.473654990296887e-06, "loss": 1.2866, "step": 3350 }, { "epoch": 0.7, "learning_rate": 8.462521724683637e-06, "loss": 1.202, "step": 3351 }, { "epoch": 0.71, "learning_rate": 8.451393814821427e-06, "loss": 1.2378, "step": 3352 }, { "epoch": 0.71, "learning_rate": 8.440271265875875e-06, "loss": 1.2491, "step": 3353 }, { "epoch": 0.71, "learning_rate": 8.42915408301013e-06, "loss": 1.214, "step": 3354 }, { "epoch": 0.71, "learning_rate": 8.418042271384828e-06, "loss": 1.2479, "step": 3355 }, { "epoch": 0.71, "learning_rate": 8.406935836158138e-06, "loss": 1.209, "step": 3356 }, { "epoch": 0.71, "learning_rate": 8.39583478248571e-06, "loss": 1.235, "step": 3357 }, { "epoch": 0.71, "learning_rate": 8.3847391155207e-06, "loss": 1.27, "step": 3358 }, { "epoch": 0.71, "learning_rate": 8.373648840413781e-06, "loss": 1.2391, "step": 3359 }, { "epoch": 0.71, "learning_rate": 8.362563962313095e-06, "loss": 1.2589, "step": 3360 }, { "epoch": 0.71, "learning_rate": 8.35148448636431e-06, "loss": 1.1859, "step": 3361 }, { "epoch": 0.71, "learning_rate": 8.340410417710562e-06, "loss": 1.2299, "step": 3362 }, { "epoch": 0.71, "learning_rate": 8.32934176149248e-06, "loss": 1.2465, "step": 3363 }, { "epoch": 0.71, "learning_rate": 8.318278522848198e-06, "loss": 1.2249, "step": 3364 }, { "epoch": 0.71, "learning_rate": 8.307220706913308e-06, "loss": 1.2563, "step": 3365 }, { "epoch": 0.71, "learning_rate": 8.296168318820914e-06, "loss": 1.2431, "step": 3366 }, { "epoch": 0.71, "learning_rate": 8.28512136370158e-06, "loss": 1.2983, "step": 3367 }, { "epoch": 0.71, "learning_rate": 8.274079846683346e-06, "loss": 1.257, "step": 3368 }, { "epoch": 0.71, "learning_rate": 8.263043772891752e-06, "loss": 1.2664, "step": 3369 }, { "epoch": 0.71, "learning_rate": 8.252013147449785e-06, "loss": 1.2095, "step": 3370 }, { "epoch": 0.71, "learning_rate": 8.240987975477903e-06, "loss": 1.2258, "step": 3371 }, { "epoch": 0.71, "learning_rate": 8.229968262094064e-06, "loss": 1.2169, "step": 3372 }, { "epoch": 0.71, "learning_rate": 8.218954012413647e-06, "loss": 1.2153, "step": 3373 }, { "epoch": 0.71, "learning_rate": 8.207945231549539e-06, "loss": 1.2195, "step": 3374 }, { "epoch": 0.71, "learning_rate": 8.19694192461205e-06, "loss": 1.2949, "step": 3375 }, { "epoch": 0.71, "learning_rate": 8.185944096708982e-06, "loss": 1.23, "step": 3376 }, { "epoch": 0.71, "learning_rate": 8.17495175294556e-06, "loss": 1.2486, "step": 3377 }, { "epoch": 0.71, "learning_rate": 8.1639648984245e-06, "loss": 1.2274, "step": 3378 }, { "epoch": 0.71, "learning_rate": 8.152983538245933e-06, "loss": 1.2346, "step": 3379 }, { "epoch": 0.71, "learning_rate": 8.142007677507475e-06, "loss": 1.284, "step": 3380 }, { "epoch": 0.71, "learning_rate": 8.131037321304154e-06, "loss": 1.2561, "step": 3381 }, { "epoch": 0.71, "learning_rate": 8.120072474728476e-06, "loss": 1.2514, "step": 3382 }, { "epoch": 0.71, "learning_rate": 8.10911314287037e-06, "loss": 1.2614, "step": 3383 }, { "epoch": 0.71, "learning_rate": 8.098159330817192e-06, "loss": 1.2331, "step": 3384 }, { "epoch": 0.71, "learning_rate": 8.087211043653777e-06, "loss": 1.2503, "step": 3385 }, { "epoch": 0.71, "learning_rate": 8.076268286462352e-06, "loss": 1.2748, "step": 3386 }, { "epoch": 0.71, "learning_rate": 8.065331064322612e-06, "loss": 1.2431, "step": 3387 }, { "epoch": 0.71, "learning_rate": 8.054399382311657e-06, "loss": 1.1891, "step": 3388 }, { "epoch": 0.71, "learning_rate": 8.043473245504017e-06, "loss": 1.2109, "step": 3389 }, { "epoch": 0.71, "learning_rate": 8.032552658971672e-06, "loss": 1.2667, "step": 3390 }, { "epoch": 0.71, "learning_rate": 8.021637627784e-06, "loss": 1.2332, "step": 3391 }, { "epoch": 0.71, "learning_rate": 8.010728157007805e-06, "loss": 1.183, "step": 3392 }, { "epoch": 0.71, "learning_rate": 7.999824251707324e-06, "loss": 1.2525, "step": 3393 }, { "epoch": 0.71, "learning_rate": 7.98892591694419e-06, "loss": 1.256, "step": 3394 }, { "epoch": 0.71, "learning_rate": 7.978033157777473e-06, "loss": 1.2587, "step": 3395 }, { "epoch": 0.71, "learning_rate": 7.967145979263637e-06, "loss": 1.2686, "step": 3396 }, { "epoch": 0.71, "learning_rate": 7.956264386456551e-06, "loss": 1.2134, "step": 3397 }, { "epoch": 0.71, "learning_rate": 7.945388384407518e-06, "loss": 1.244, "step": 3398 }, { "epoch": 0.71, "learning_rate": 7.934517978165211e-06, "loss": 1.2408, "step": 3399 }, { "epoch": 0.72, "learning_rate": 7.92365317277574e-06, "loss": 1.2673, "step": 3400 }, { "epoch": 0.72, "learning_rate": 7.912793973282584e-06, "loss": 1.2307, "step": 3401 }, { "epoch": 0.72, "learning_rate": 7.90194038472665e-06, "loss": 1.256, "step": 3402 }, { "epoch": 0.72, "learning_rate": 7.891092412146204e-06, "loss": 1.2364, "step": 3403 }, { "epoch": 0.72, "learning_rate": 7.880250060576943e-06, "loss": 1.2482, "step": 3404 }, { "epoch": 0.72, "learning_rate": 7.869413335051926e-06, "loss": 1.2059, "step": 3405 }, { "epoch": 0.72, "learning_rate": 7.85858224060162e-06, "loss": 1.2578, "step": 3406 }, { "epoch": 0.72, "learning_rate": 7.847756782253864e-06, "loss": 1.1665, "step": 3407 }, { "epoch": 0.72, "learning_rate": 7.836936965033881e-06, "loss": 1.2442, "step": 3408 }, { "epoch": 0.72, "learning_rate": 7.826122793964293e-06, "loss": 1.2477, "step": 3409 }, { "epoch": 0.72, "learning_rate": 7.815314274065074e-06, "loss": 1.2205, "step": 3410 }, { "epoch": 0.72, "learning_rate": 7.804511410353603e-06, "loss": 1.2614, "step": 3411 }, { "epoch": 0.72, "learning_rate": 7.793714207844616e-06, "loss": 1.2646, "step": 3412 }, { "epoch": 0.72, "learning_rate": 7.782922671550213e-06, "loss": 1.2035, "step": 3413 }, { "epoch": 0.72, "learning_rate": 7.772136806479891e-06, "loss": 1.1502, "step": 3414 }, { "epoch": 0.72, "learning_rate": 7.761356617640485e-06, "loss": 1.2283, "step": 3415 }, { "epoch": 0.72, "learning_rate": 7.750582110036225e-06, "loss": 1.2111, "step": 3416 }, { "epoch": 0.72, "learning_rate": 7.739813288668677e-06, "loss": 1.257, "step": 3417 }, { "epoch": 0.72, "learning_rate": 7.72905015853677e-06, "loss": 1.2329, "step": 3418 }, { "epoch": 0.72, "learning_rate": 7.718292724636815e-06, "loss": 1.2531, "step": 3419 }, { "epoch": 0.72, "learning_rate": 7.70754099196246e-06, "loss": 1.2305, "step": 3420 }, { "epoch": 0.72, "learning_rate": 7.696794965504695e-06, "loss": 1.2241, "step": 3421 }, { "epoch": 0.72, "learning_rate": 7.686054650251893e-06, "loss": 1.2656, "step": 3422 }, { "epoch": 0.72, "learning_rate": 7.675320051189746e-06, "loss": 1.1752, "step": 3423 }, { "epoch": 0.72, "learning_rate": 7.664591173301315e-06, "loss": 1.2414, "step": 3424 }, { "epoch": 0.72, "learning_rate": 7.65386802156698e-06, "loss": 1.2726, "step": 3425 }, { "epoch": 0.72, "learning_rate": 7.64315060096449e-06, "loss": 1.2527, "step": 3426 }, { "epoch": 0.72, "learning_rate": 7.632438916468928e-06, "loss": 1.2549, "step": 3427 }, { "epoch": 0.72, "learning_rate": 7.621732973052696e-06, "loss": 1.1833, "step": 3428 }, { "epoch": 0.72, "learning_rate": 7.611032775685541e-06, "loss": 1.2045, "step": 3429 }, { "epoch": 0.72, "learning_rate": 7.600338329334554e-06, "loss": 1.2253, "step": 3430 }, { "epoch": 0.72, "learning_rate": 7.5896496389641336e-06, "loss": 1.2404, "step": 3431 }, { "epoch": 0.72, "learning_rate": 7.5789667095360355e-06, "loss": 1.2634, "step": 3432 }, { "epoch": 0.72, "learning_rate": 7.568289546009316e-06, "loss": 1.2626, "step": 3433 }, { "epoch": 0.72, "learning_rate": 7.557618153340358e-06, "loss": 1.2234, "step": 3434 }, { "epoch": 0.72, "learning_rate": 7.546952536482888e-06, "loss": 1.2331, "step": 3435 }, { "epoch": 0.72, "learning_rate": 7.536292700387924e-06, "loss": 1.2757, "step": 3436 }, { "epoch": 0.72, "learning_rate": 7.5256386500038055e-06, "loss": 1.1817, "step": 3437 }, { "epoch": 0.72, "learning_rate": 7.5149903902762066e-06, "loss": 1.2456, "step": 3438 }, { "epoch": 0.72, "learning_rate": 7.504347926148086e-06, "loss": 1.2407, "step": 3439 }, { "epoch": 0.72, "learning_rate": 7.4937112625597375e-06, "loss": 1.2644, "step": 3440 }, { "epoch": 0.72, "learning_rate": 7.483080404448744e-06, "loss": 1.2944, "step": 3441 }, { "epoch": 0.72, "learning_rate": 7.472455356749992e-06, "loss": 1.2336, "step": 3442 }, { "epoch": 0.72, "learning_rate": 7.461836124395692e-06, "loss": 1.2524, "step": 3443 }, { "epoch": 0.72, "learning_rate": 7.451222712315325e-06, "loss": 1.2425, "step": 3444 }, { "epoch": 0.72, "learning_rate": 7.440615125435702e-06, "loss": 1.2094, "step": 3445 }, { "epoch": 0.72, "learning_rate": 7.430013368680908e-06, "loss": 1.2546, "step": 3446 }, { "epoch": 0.73, "learning_rate": 7.419417446972319e-06, "loss": 1.255, "step": 3447 }, { "epoch": 0.73, "learning_rate": 7.408827365228625e-06, "loss": 1.1557, "step": 3448 }, { "epoch": 0.73, "learning_rate": 7.3982431283657805e-06, "loss": 1.2077, "step": 3449 }, { "epoch": 0.73, "learning_rate": 7.38766474129704e-06, "loss": 1.2892, "step": 3450 }, { "epoch": 0.73, "learning_rate": 7.37709220893295e-06, "loss": 1.2841, "step": 3451 }, { "epoch": 0.73, "learning_rate": 7.3665255361813125e-06, "loss": 1.2524, "step": 3452 }, { "epoch": 0.73, "learning_rate": 7.355964727947242e-06, "loss": 1.2555, "step": 3453 }, { "epoch": 0.73, "learning_rate": 7.3454097891331085e-06, "loss": 1.2726, "step": 3454 }, { "epoch": 0.73, "learning_rate": 7.334860724638555e-06, "loss": 1.244, "step": 3455 }, { "epoch": 0.73, "learning_rate": 7.3243175393605215e-06, "loss": 1.2741, "step": 3456 }, { "epoch": 0.73, "learning_rate": 7.313780238193195e-06, "loss": 1.2557, "step": 3457 }, { "epoch": 0.73, "learning_rate": 7.303248826028036e-06, "loss": 1.1983, "step": 3458 }, { "epoch": 0.73, "learning_rate": 7.292723307753784e-06, "loss": 1.2621, "step": 3459 }, { "epoch": 0.73, "learning_rate": 7.282203688256422e-06, "loss": 1.2212, "step": 3460 }, { "epoch": 0.73, "learning_rate": 7.27168997241922e-06, "loss": 1.2561, "step": 3461 }, { "epoch": 0.73, "learning_rate": 7.261182165122689e-06, "loss": 1.27, "step": 3462 }, { "epoch": 0.73, "learning_rate": 7.250680271244593e-06, "loss": 1.2527, "step": 3463 }, { "epoch": 0.73, "learning_rate": 7.240184295659971e-06, "loss": 1.2466, "step": 3464 }, { "epoch": 0.73, "learning_rate": 7.229694243241097e-06, "loss": 1.2089, "step": 3465 }, { "epoch": 0.73, "learning_rate": 7.219210118857509e-06, "loss": 1.2399, "step": 3466 }, { "epoch": 0.73, "learning_rate": 7.208731927375982e-06, "loss": 1.2491, "step": 3467 }, { "epoch": 0.73, "learning_rate": 7.198259673660535e-06, "loss": 1.2486, "step": 3468 }, { "epoch": 0.73, "learning_rate": 7.187793362572451e-06, "loss": 1.2738, "step": 3469 }, { "epoch": 0.73, "learning_rate": 7.17733299897023e-06, "loss": 1.2042, "step": 3470 }, { "epoch": 0.73, "learning_rate": 7.166878587709618e-06, "loss": 1.2367, "step": 3471 }, { "epoch": 0.73, "learning_rate": 7.156430133643613e-06, "loss": 1.257, "step": 3472 }, { "epoch": 0.73, "learning_rate": 7.145987641622423e-06, "loss": 1.2147, "step": 3473 }, { "epoch": 0.73, "learning_rate": 7.1355511164935085e-06, "loss": 1.202, "step": 3474 }, { "epoch": 0.73, "learning_rate": 7.125120563101562e-06, "loss": 1.2162, "step": 3475 }, { "epoch": 0.73, "learning_rate": 7.114695986288476e-06, "loss": 1.2245, "step": 3476 }, { "epoch": 0.73, "learning_rate": 7.104277390893404e-06, "loss": 1.2367, "step": 3477 }, { "epoch": 0.73, "learning_rate": 7.0938647817527014e-06, "loss": 1.235, "step": 3478 }, { "epoch": 0.73, "learning_rate": 7.083458163699939e-06, "loss": 1.2113, "step": 3479 }, { "epoch": 0.73, "learning_rate": 7.073057541565933e-06, "loss": 1.2623, "step": 3480 }, { "epoch": 0.73, "learning_rate": 7.062662920178689e-06, "loss": 1.2144, "step": 3481 }, { "epoch": 0.73, "learning_rate": 7.052274304363449e-06, "loss": 1.2208, "step": 3482 }, { "epoch": 0.73, "learning_rate": 7.041891698942649e-06, "loss": 1.2248, "step": 3483 }, { "epoch": 0.73, "learning_rate": 7.03151510873594e-06, "loss": 1.2647, "step": 3484 }, { "epoch": 0.73, "learning_rate": 7.021144538560194e-06, "loss": 1.2, "step": 3485 }, { "epoch": 0.73, "learning_rate": 7.010779993229471e-06, "loss": 1.2185, "step": 3486 }, { "epoch": 0.73, "learning_rate": 7.000421477555038e-06, "loss": 1.2572, "step": 3487 }, { "epoch": 0.73, "learning_rate": 6.9900689963453734e-06, "loss": 1.2395, "step": 3488 }, { "epoch": 0.73, "learning_rate": 6.9797225544061385e-06, "loss": 1.2401, "step": 3489 }, { "epoch": 0.73, "learning_rate": 6.969382156540212e-06, "loss": 1.2825, "step": 3490 }, { "epoch": 0.73, "learning_rate": 6.9590478075476475e-06, "loss": 1.2534, "step": 3491 }, { "epoch": 0.73, "learning_rate": 6.9487195122256925e-06, "loss": 1.2292, "step": 3492 }, { "epoch": 0.73, "learning_rate": 6.9383972753688e-06, "loss": 1.2419, "step": 3493 }, { "epoch": 0.73, "learning_rate": 6.928081101768589e-06, "loss": 1.2715, "step": 3494 }, { "epoch": 0.74, "learning_rate": 6.9177709962138905e-06, "loss": 1.2377, "step": 3495 }, { "epoch": 0.74, "learning_rate": 6.907466963490692e-06, "loss": 1.2476, "step": 3496 }, { "epoch": 0.74, "learning_rate": 6.897169008382172e-06, "loss": 1.2539, "step": 3497 }, { "epoch": 0.74, "learning_rate": 6.88687713566869e-06, "loss": 1.2817, "step": 3498 }, { "epoch": 0.74, "learning_rate": 6.876591350127795e-06, "loss": 1.2159, "step": 3499 }, { "epoch": 0.74, "learning_rate": 6.866311656534177e-06, "loss": 1.2417, "step": 3500 }, { "epoch": 0.74, "learning_rate": 6.856038059659731e-06, "loss": 1.2626, "step": 3501 }, { "epoch": 0.74, "learning_rate": 6.8457705642734994e-06, "loss": 1.2404, "step": 3502 }, { "epoch": 0.74, "learning_rate": 6.835509175141713e-06, "loss": 1.2234, "step": 3503 }, { "epoch": 0.74, "learning_rate": 6.825253897027746e-06, "loss": 1.2569, "step": 3504 }, { "epoch": 0.74, "learning_rate": 6.815004734692146e-06, "loss": 1.2727, "step": 3505 }, { "epoch": 0.74, "learning_rate": 6.804761692892627e-06, "loss": 1.2193, "step": 3506 }, { "epoch": 0.74, "learning_rate": 6.794524776384059e-06, "loss": 1.2128, "step": 3507 }, { "epoch": 0.74, "learning_rate": 6.784293989918454e-06, "loss": 1.2532, "step": 3508 }, { "epoch": 0.74, "learning_rate": 6.774069338245002e-06, "loss": 1.2278, "step": 3509 }, { "epoch": 0.74, "learning_rate": 6.763850826110025e-06, "loss": 1.2912, "step": 3510 }, { "epoch": 0.74, "learning_rate": 6.753638458257017e-06, "loss": 1.2289, "step": 3511 }, { "epoch": 0.74, "learning_rate": 6.743432239426599e-06, "loss": 1.2061, "step": 3512 }, { "epoch": 0.74, "learning_rate": 6.733232174356537e-06, "loss": 1.182, "step": 3513 }, { "epoch": 0.74, "learning_rate": 6.723038267781763e-06, "loss": 1.2255, "step": 3514 }, { "epoch": 0.74, "learning_rate": 6.712850524434329e-06, "loss": 1.2829, "step": 3515 }, { "epoch": 0.74, "learning_rate": 6.7026689490434275e-06, "loss": 1.2193, "step": 3516 }, { "epoch": 0.74, "learning_rate": 6.692493546335404e-06, "loss": 1.2078, "step": 3517 }, { "epoch": 0.74, "learning_rate": 6.682324321033715e-06, "loss": 1.215, "step": 3518 }, { "epoch": 0.74, "learning_rate": 6.672161277858977e-06, "loss": 1.2736, "step": 3519 }, { "epoch": 0.74, "learning_rate": 6.662004421528909e-06, "loss": 1.1534, "step": 3520 }, { "epoch": 0.74, "learning_rate": 6.651853756758382e-06, "loss": 1.2636, "step": 3521 }, { "epoch": 0.74, "learning_rate": 6.641709288259368e-06, "loss": 1.2854, "step": 3522 }, { "epoch": 0.74, "learning_rate": 6.6315710207409925e-06, "loss": 1.2688, "step": 3523 }, { "epoch": 0.74, "learning_rate": 6.621438958909472e-06, "loss": 1.2235, "step": 3524 }, { "epoch": 0.74, "learning_rate": 6.6113131074681694e-06, "loss": 1.2121, "step": 3525 }, { "epoch": 0.74, "learning_rate": 6.6011934711175395e-06, "loss": 1.2806, "step": 3526 }, { "epoch": 0.74, "learning_rate": 6.591080054555177e-06, "loss": 1.2419, "step": 3527 }, { "epoch": 0.74, "learning_rate": 6.580972862475769e-06, "loss": 1.2342, "step": 3528 }, { "epoch": 0.74, "learning_rate": 6.570871899571119e-06, "loss": 1.2547, "step": 3529 }, { "epoch": 0.74, "learning_rate": 6.56077717053015e-06, "loss": 1.214, "step": 3530 }, { "epoch": 0.74, "learning_rate": 6.550688680038871e-06, "loss": 1.2362, "step": 3531 }, { "epoch": 0.74, "learning_rate": 6.5406064327804165e-06, "loss": 1.2625, "step": 3532 }, { "epoch": 0.74, "learning_rate": 6.5305304334350075e-06, "loss": 1.2325, "step": 3533 }, { "epoch": 0.74, "learning_rate": 6.520460686679964e-06, "loss": 1.2307, "step": 3534 }, { "epoch": 0.74, "learning_rate": 6.510397197189724e-06, "loss": 1.2151, "step": 3535 }, { "epoch": 0.74, "learning_rate": 6.500339969635794e-06, "loss": 1.1559, "step": 3536 }, { "epoch": 0.74, "learning_rate": 6.490289008686786e-06, "loss": 1.2368, "step": 3537 }, { "epoch": 0.74, "learning_rate": 6.480244319008411e-06, "loss": 1.2878, "step": 3538 }, { "epoch": 0.74, "learning_rate": 6.470205905263449e-06, "loss": 1.2264, "step": 3539 }, { "epoch": 0.74, "learning_rate": 6.460173772111791e-06, "loss": 1.2734, "step": 3540 }, { "epoch": 0.74, "learning_rate": 6.450147924210395e-06, "loss": 1.1985, "step": 3541 }, { "epoch": 0.75, "learning_rate": 6.440128366213297e-06, "loss": 1.227, "step": 3542 }, { "epoch": 0.75, "learning_rate": 6.430115102771637e-06, "loss": 1.2165, "step": 3543 }, { "epoch": 0.75, "learning_rate": 6.420108138533607e-06, "loss": 1.2112, "step": 3544 }, { "epoch": 0.75, "learning_rate": 6.410107478144496e-06, "loss": 1.2751, "step": 3545 }, { "epoch": 0.75, "learning_rate": 6.400113126246645e-06, "loss": 1.2595, "step": 3546 }, { "epoch": 0.75, "learning_rate": 6.390125087479493e-06, "loss": 1.2717, "step": 3547 }, { "epoch": 0.75, "learning_rate": 6.380143366479521e-06, "loss": 1.249, "step": 3548 }, { "epoch": 0.75, "learning_rate": 6.370167967880303e-06, "loss": 1.1961, "step": 3549 }, { "epoch": 0.75, "learning_rate": 6.360198896312451e-06, "loss": 1.2486, "step": 3550 }, { "epoch": 0.75, "learning_rate": 6.350236156403666e-06, "loss": 1.2977, "step": 3551 }, { "epoch": 0.75, "learning_rate": 6.3402797527786904e-06, "loss": 1.1871, "step": 3552 }, { "epoch": 0.75, "learning_rate": 6.330329690059342e-06, "loss": 1.3087, "step": 3553 }, { "epoch": 0.75, "learning_rate": 6.32038597286448e-06, "loss": 1.201, "step": 3554 }, { "epoch": 0.75, "learning_rate": 6.31044860581002e-06, "loss": 1.248, "step": 3555 }, { "epoch": 0.75, "learning_rate": 6.300517593508944e-06, "loss": 1.1977, "step": 3556 }, { "epoch": 0.75, "learning_rate": 6.290592940571269e-06, "loss": 1.2783, "step": 3557 }, { "epoch": 0.75, "learning_rate": 6.280674651604059e-06, "loss": 1.2428, "step": 3558 }, { "epoch": 0.75, "learning_rate": 6.270762731211442e-06, "loss": 1.2188, "step": 3559 }, { "epoch": 0.75, "learning_rate": 6.260857183994564e-06, "loss": 1.2497, "step": 3560 }, { "epoch": 0.75, "learning_rate": 6.25095801455164e-06, "loss": 1.2437, "step": 3561 }, { "epoch": 0.75, "learning_rate": 6.241065227477905e-06, "loss": 1.2495, "step": 3562 }, { "epoch": 0.75, "learning_rate": 6.231178827365627e-06, "loss": 1.2344, "step": 3563 }, { "epoch": 0.75, "learning_rate": 6.221298818804136e-06, "loss": 1.2486, "step": 3564 }, { "epoch": 0.75, "learning_rate": 6.211425206379769e-06, "loss": 1.225, "step": 3565 }, { "epoch": 0.75, "learning_rate": 6.201557994675895e-06, "loss": 1.2596, "step": 3566 }, { "epoch": 0.75, "learning_rate": 6.191697188272933e-06, "loss": 1.2276, "step": 3567 }, { "epoch": 0.75, "learning_rate": 6.181842791748307e-06, "loss": 1.2601, "step": 3568 }, { "epoch": 0.75, "learning_rate": 6.17199480967648e-06, "loss": 1.2197, "step": 3569 }, { "epoch": 0.75, "learning_rate": 6.162153246628921e-06, "loss": 1.2276, "step": 3570 }, { "epoch": 0.75, "learning_rate": 6.152318107174144e-06, "loss": 1.2539, "step": 3571 }, { "epoch": 0.75, "learning_rate": 6.142489395877651e-06, "loss": 1.2383, "step": 3572 }, { "epoch": 0.75, "learning_rate": 6.132667117301989e-06, "loss": 1.2276, "step": 3573 }, { "epoch": 0.75, "learning_rate": 6.122851276006692e-06, "loss": 1.2338, "step": 3574 }, { "epoch": 0.75, "learning_rate": 6.113041876548333e-06, "loss": 1.2444, "step": 3575 }, { "epoch": 0.75, "learning_rate": 6.103238923480468e-06, "loss": 1.2727, "step": 3576 }, { "epoch": 0.75, "learning_rate": 6.093442421353683e-06, "loss": 1.2161, "step": 3577 }, { "epoch": 0.75, "learning_rate": 6.083652374715561e-06, "loss": 1.2587, "step": 3578 }, { "epoch": 0.75, "learning_rate": 6.073868788110673e-06, "loss": 1.2926, "step": 3579 }, { "epoch": 0.75, "learning_rate": 6.064091666080621e-06, "loss": 1.2441, "step": 3580 }, { "epoch": 0.75, "learning_rate": 6.054321013163978e-06, "loss": 1.2681, "step": 3581 }, { "epoch": 0.75, "learning_rate": 6.044556833896338e-06, "loss": 1.2571, "step": 3582 }, { "epoch": 0.75, "learning_rate": 6.034799132810274e-06, "loss": 1.2116, "step": 3583 }, { "epoch": 0.75, "learning_rate": 6.025047914435349e-06, "loss": 1.2729, "step": 3584 }, { "epoch": 0.75, "learning_rate": 6.015303183298135e-06, "loss": 1.2212, "step": 3585 }, { "epoch": 0.75, "learning_rate": 6.005564943922179e-06, "loss": 1.2731, "step": 3586 }, { "epoch": 0.75, "learning_rate": 5.995833200828007e-06, "loss": 1.2619, "step": 3587 }, { "epoch": 0.75, "learning_rate": 5.9861079585331535e-06, "loss": 1.2284, "step": 3588 }, { "epoch": 0.75, "learning_rate": 5.97638922155211e-06, "loss": 1.2559, "step": 3589 }, { "epoch": 0.76, "learning_rate": 5.9666769943963675e-06, "loss": 1.3126, "step": 3590 }, { "epoch": 0.76, "learning_rate": 5.956971281574386e-06, "loss": 1.2676, "step": 3591 }, { "epoch": 0.76, "learning_rate": 5.947272087591591e-06, "loss": 1.2489, "step": 3592 }, { "epoch": 0.76, "learning_rate": 5.9375794169504095e-06, "loss": 1.2458, "step": 3593 }, { "epoch": 0.76, "learning_rate": 5.927893274150214e-06, "loss": 1.192, "step": 3594 }, { "epoch": 0.76, "learning_rate": 5.918213663687362e-06, "loss": 1.2844, "step": 3595 }, { "epoch": 0.76, "learning_rate": 5.908540590055168e-06, "loss": 1.2122, "step": 3596 }, { "epoch": 0.76, "learning_rate": 5.898874057743926e-06, "loss": 1.2162, "step": 3597 }, { "epoch": 0.76, "learning_rate": 5.889214071240876e-06, "loss": 1.2739, "step": 3598 }, { "epoch": 0.76, "learning_rate": 5.879560635030242e-06, "loss": 1.3043, "step": 3599 }, { "epoch": 0.76, "learning_rate": 5.869913753593175e-06, "loss": 1.2569, "step": 3600 }, { "epoch": 0.76, "learning_rate": 5.860273431407821e-06, "loss": 1.1974, "step": 3601 }, { "epoch": 0.76, "learning_rate": 5.8506396729492455e-06, "loss": 1.236, "step": 3602 }, { "epoch": 0.76, "learning_rate": 5.841012482689501e-06, "loss": 1.2349, "step": 3603 }, { "epoch": 0.76, "learning_rate": 5.831391865097564e-06, "loss": 1.191, "step": 3604 }, { "epoch": 0.76, "learning_rate": 5.821777824639365e-06, "loss": 1.2687, "step": 3605 }, { "epoch": 0.76, "learning_rate": 5.812170365777801e-06, "loss": 1.2637, "step": 3606 }, { "epoch": 0.76, "learning_rate": 5.802569492972687e-06, "loss": 1.2613, "step": 3607 }, { "epoch": 0.76, "learning_rate": 5.792975210680793e-06, "loss": 1.2483, "step": 3608 }, { "epoch": 0.76, "learning_rate": 5.783387523355839e-06, "loss": 1.2237, "step": 3609 }, { "epoch": 0.76, "learning_rate": 5.773806435448459e-06, "loss": 1.2205, "step": 3610 }, { "epoch": 0.76, "learning_rate": 5.764231951406256e-06, "loss": 1.2307, "step": 3611 }, { "epoch": 0.76, "learning_rate": 5.75466407567374e-06, "loss": 1.2022, "step": 3612 }, { "epoch": 0.76, "learning_rate": 5.745102812692358e-06, "loss": 1.194, "step": 3613 }, { "epoch": 0.76, "learning_rate": 5.735548166900506e-06, "loss": 1.232, "step": 3614 }, { "epoch": 0.76, "learning_rate": 5.72600014273349e-06, "loss": 1.2266, "step": 3615 }, { "epoch": 0.76, "learning_rate": 5.716458744623536e-06, "loss": 1.223, "step": 3616 }, { "epoch": 0.76, "learning_rate": 5.706923976999825e-06, "loss": 1.2352, "step": 3617 }, { "epoch": 0.76, "learning_rate": 5.697395844288423e-06, "loss": 1.2387, "step": 3618 }, { "epoch": 0.76, "learning_rate": 5.687874350912346e-06, "loss": 1.2801, "step": 3619 }, { "epoch": 0.76, "learning_rate": 5.678359501291504e-06, "loss": 1.2382, "step": 3620 }, { "epoch": 0.76, "learning_rate": 5.668851299842739e-06, "loss": 1.2551, "step": 3621 }, { "epoch": 0.76, "learning_rate": 5.659349750979814e-06, "loss": 1.2399, "step": 3622 }, { "epoch": 0.76, "learning_rate": 5.6498548591133725e-06, "loss": 1.2322, "step": 3623 }, { "epoch": 0.76, "learning_rate": 5.6403666286510065e-06, "loss": 1.2039, "step": 3624 }, { "epoch": 0.76, "learning_rate": 5.630885063997187e-06, "loss": 1.2529, "step": 3625 }, { "epoch": 0.76, "learning_rate": 5.621410169553292e-06, "loss": 1.2361, "step": 3626 }, { "epoch": 0.76, "learning_rate": 5.6119419497176275e-06, "loss": 1.192, "step": 3627 }, { "epoch": 0.76, "learning_rate": 5.6024804088853775e-06, "loss": 1.2415, "step": 3628 }, { "epoch": 0.76, "learning_rate": 5.59302555144863e-06, "loss": 1.2426, "step": 3629 }, { "epoch": 0.76, "learning_rate": 5.5835773817963814e-06, "loss": 1.2766, "step": 3630 }, { "epoch": 0.76, "learning_rate": 5.574135904314504e-06, "loss": 1.253, "step": 3631 }, { "epoch": 0.76, "learning_rate": 5.5647011233857915e-06, "loss": 1.205, "step": 3632 }, { "epoch": 0.76, "learning_rate": 5.555273043389906e-06, "loss": 1.2142, "step": 3633 }, { "epoch": 0.76, "learning_rate": 5.545851668703397e-06, "loss": 1.2438, "step": 3634 }, { "epoch": 0.76, "learning_rate": 5.536437003699724e-06, "loss": 1.2575, "step": 3635 }, { "epoch": 0.76, "learning_rate": 5.527029052749216e-06, "loss": 1.1944, "step": 3636 }, { "epoch": 0.76, "learning_rate": 5.517627820219076e-06, "loss": 1.2532, "step": 3637 }, { "epoch": 0.77, "learning_rate": 5.508233310473412e-06, "loss": 1.2462, "step": 3638 }, { "epoch": 0.77, "learning_rate": 5.498845527873193e-06, "loss": 1.2677, "step": 3639 }, { "epoch": 0.77, "learning_rate": 5.489464476776276e-06, "loss": 1.2286, "step": 3640 }, { "epoch": 0.77, "learning_rate": 5.480090161537388e-06, "loss": 1.196, "step": 3641 }, { "epoch": 0.77, "learning_rate": 5.470722586508122e-06, "loss": 1.2695, "step": 3642 }, { "epoch": 0.77, "learning_rate": 5.4613617560369625e-06, "loss": 1.2407, "step": 3643 }, { "epoch": 0.77, "learning_rate": 5.452007674469235e-06, "loss": 1.2691, "step": 3644 }, { "epoch": 0.77, "learning_rate": 5.442660346147157e-06, "loss": 1.2453, "step": 3645 }, { "epoch": 0.77, "learning_rate": 5.433319775409807e-06, "loss": 1.2642, "step": 3646 }, { "epoch": 0.77, "learning_rate": 5.4239859665931105e-06, "loss": 1.254, "step": 3647 }, { "epoch": 0.77, "learning_rate": 5.4146589240298745e-06, "loss": 1.2766, "step": 3648 }, { "epoch": 0.77, "learning_rate": 5.405338652049749e-06, "loss": 1.2345, "step": 3649 }, { "epoch": 0.77, "learning_rate": 5.396025154979247e-06, "loss": 1.179, "step": 3650 }, { "epoch": 0.77, "learning_rate": 5.386718437141743e-06, "loss": 1.1928, "step": 3651 }, { "epoch": 0.77, "learning_rate": 5.37741850285745e-06, "loss": 1.2355, "step": 3652 }, { "epoch": 0.77, "learning_rate": 5.368125356443452e-06, "loss": 1.2487, "step": 3653 }, { "epoch": 0.77, "learning_rate": 5.358839002213665e-06, "loss": 1.2252, "step": 3654 }, { "epoch": 0.77, "learning_rate": 5.349559444478849e-06, "loss": 1.2632, "step": 3655 }, { "epoch": 0.77, "learning_rate": 5.3402866875466344e-06, "loss": 1.2128, "step": 3656 }, { "epoch": 0.77, "learning_rate": 5.331020735721469e-06, "loss": 1.2661, "step": 3657 }, { "epoch": 0.77, "learning_rate": 5.321761593304646e-06, "loss": 1.2443, "step": 3658 }, { "epoch": 0.77, "learning_rate": 5.312509264594312e-06, "loss": 1.1673, "step": 3659 }, { "epoch": 0.77, "learning_rate": 5.303263753885433e-06, "loss": 1.2504, "step": 3660 }, { "epoch": 0.77, "learning_rate": 5.294025065469827e-06, "loss": 1.2509, "step": 3661 }, { "epoch": 0.77, "learning_rate": 5.284793203636132e-06, "loss": 1.3018, "step": 3662 }, { "epoch": 0.77, "learning_rate": 5.2755681726698134e-06, "loss": 1.2068, "step": 3663 }, { "epoch": 0.77, "learning_rate": 5.2663499768531865e-06, "loss": 1.2447, "step": 3664 }, { "epoch": 0.77, "learning_rate": 5.257138620465374e-06, "loss": 1.1832, "step": 3665 }, { "epoch": 0.77, "learning_rate": 5.247934107782324e-06, "loss": 1.2177, "step": 3666 }, { "epoch": 0.77, "learning_rate": 5.238736443076828e-06, "loss": 1.2891, "step": 3667 }, { "epoch": 0.77, "learning_rate": 5.2295456306184715e-06, "loss": 1.2142, "step": 3668 }, { "epoch": 0.77, "learning_rate": 5.220361674673677e-06, "loss": 1.2576, "step": 3669 }, { "epoch": 0.77, "learning_rate": 5.211184579505688e-06, "loss": 1.2489, "step": 3670 }, { "epoch": 0.77, "learning_rate": 5.2020143493745425e-06, "loss": 1.2028, "step": 3671 }, { "epoch": 0.77, "learning_rate": 5.192850988537115e-06, "loss": 1.2788, "step": 3672 }, { "epoch": 0.77, "learning_rate": 5.183694501247072e-06, "loss": 1.2287, "step": 3673 }, { "epoch": 0.77, "learning_rate": 5.174544891754896e-06, "loss": 1.2406, "step": 3674 }, { "epoch": 0.77, "learning_rate": 5.165402164307884e-06, "loss": 1.2041, "step": 3675 }, { "epoch": 0.77, "learning_rate": 5.15626632315013e-06, "loss": 1.257, "step": 3676 }, { "epoch": 0.77, "learning_rate": 5.147137372522537e-06, "loss": 1.2025, "step": 3677 }, { "epoch": 0.77, "learning_rate": 5.138015316662803e-06, "loss": 1.2163, "step": 3678 }, { "epoch": 0.77, "learning_rate": 5.128900159805425e-06, "loss": 1.2846, "step": 3679 }, { "epoch": 0.77, "learning_rate": 5.119791906181713e-06, "loss": 1.2558, "step": 3680 }, { "epoch": 0.77, "learning_rate": 5.110690560019744e-06, "loss": 1.2866, "step": 3681 }, { "epoch": 0.77, "learning_rate": 5.1015961255444235e-06, "loss": 1.2177, "step": 3682 }, { "epoch": 0.77, "learning_rate": 5.09250860697742e-06, "loss": 1.2401, "step": 3683 }, { "epoch": 0.77, "learning_rate": 5.083428008537197e-06, "loss": 1.259, "step": 3684 }, { "epoch": 0.78, "learning_rate": 5.074354334439022e-06, "loss": 1.2806, "step": 3685 }, { "epoch": 0.78, "learning_rate": 5.065287588894933e-06, "loss": 1.1889, "step": 3686 }, { "epoch": 0.78, "learning_rate": 5.056227776113747e-06, "loss": 1.2237, "step": 3687 }, { "epoch": 0.78, "learning_rate": 5.0471749003010835e-06, "loss": 1.2335, "step": 3688 }, { "epoch": 0.78, "learning_rate": 5.038128965659317e-06, "loss": 1.2801, "step": 3689 }, { "epoch": 0.78, "learning_rate": 5.029089976387627e-06, "loss": 1.2576, "step": 3690 }, { "epoch": 0.78, "learning_rate": 5.020057936681939e-06, "loss": 1.2338, "step": 3691 }, { "epoch": 0.78, "learning_rate": 5.011032850734983e-06, "loss": 1.2521, "step": 3692 }, { "epoch": 0.78, "learning_rate": 5.00201472273623e-06, "loss": 1.2056, "step": 3693 }, { "epoch": 0.78, "learning_rate": 4.993003556871954e-06, "loss": 1.2287, "step": 3694 }, { "epoch": 0.78, "learning_rate": 4.983999357325164e-06, "loss": 1.1885, "step": 3695 }, { "epoch": 0.78, "learning_rate": 4.975002128275666e-06, "loss": 1.2171, "step": 3696 }, { "epoch": 0.78, "learning_rate": 4.966011873900001e-06, "loss": 1.2356, "step": 3697 }, { "epoch": 0.78, "learning_rate": 4.957028598371498e-06, "loss": 1.2419, "step": 3698 }, { "epoch": 0.78, "learning_rate": 4.948052305860233e-06, "loss": 1.2049, "step": 3699 }, { "epoch": 0.78, "learning_rate": 4.939083000533036e-06, "loss": 1.2135, "step": 3700 }, { "epoch": 0.78, "learning_rate": 4.93012068655351e-06, "loss": 1.1876, "step": 3701 }, { "epoch": 0.78, "learning_rate": 4.92116536808199e-06, "loss": 1.2686, "step": 3702 }, { "epoch": 0.78, "learning_rate": 4.912217049275594e-06, "loss": 1.2107, "step": 3703 }, { "epoch": 0.78, "learning_rate": 4.903275734288162e-06, "loss": 1.2027, "step": 3704 }, { "epoch": 0.78, "learning_rate": 4.8943414272702886e-06, "loss": 1.2154, "step": 3705 }, { "epoch": 0.78, "learning_rate": 4.885414132369335e-06, "loss": 1.2347, "step": 3706 }, { "epoch": 0.78, "learning_rate": 4.876493853729385e-06, "loss": 1.218, "step": 3707 }, { "epoch": 0.78, "learning_rate": 4.867580595491268e-06, "loss": 1.2118, "step": 3708 }, { "epoch": 0.78, "learning_rate": 4.858674361792571e-06, "loss": 1.2323, "step": 3709 }, { "epoch": 0.78, "learning_rate": 4.849775156767598e-06, "loss": 1.2283, "step": 3710 }, { "epoch": 0.78, "learning_rate": 4.840882984547415e-06, "loss": 1.2607, "step": 3711 }, { "epoch": 0.78, "learning_rate": 4.8319978492598e-06, "loss": 1.2752, "step": 3712 }, { "epoch": 0.78, "learning_rate": 4.823119755029271e-06, "loss": 1.2268, "step": 3713 }, { "epoch": 0.78, "learning_rate": 4.814248705977092e-06, "loss": 1.2078, "step": 3714 }, { "epoch": 0.78, "learning_rate": 4.805384706221232e-06, "loss": 1.2239, "step": 3715 }, { "epoch": 0.78, "learning_rate": 4.796527759876415e-06, "loss": 1.199, "step": 3716 }, { "epoch": 0.78, "learning_rate": 4.787677871054062e-06, "loss": 1.2697, "step": 3717 }, { "epoch": 0.78, "learning_rate": 4.7788350438623465e-06, "loss": 1.2864, "step": 3718 }, { "epoch": 0.78, "learning_rate": 4.769999282406137e-06, "loss": 1.2448, "step": 3719 }, { "epoch": 0.78, "learning_rate": 4.7611705907870474e-06, "loss": 1.2648, "step": 3720 }, { "epoch": 0.78, "learning_rate": 4.7523489731033845e-06, "loss": 1.2404, "step": 3721 }, { "epoch": 0.78, "learning_rate": 4.743534433450199e-06, "loss": 1.2325, "step": 3722 }, { "epoch": 0.78, "learning_rate": 4.734726975919233e-06, "loss": 1.2223, "step": 3723 }, { "epoch": 0.78, "learning_rate": 4.725926604598942e-06, "loss": 1.2159, "step": 3724 }, { "epoch": 0.78, "learning_rate": 4.7171333235745145e-06, "loss": 1.24, "step": 3725 }, { "epoch": 0.78, "learning_rate": 4.708347136927818e-06, "loss": 1.2243, "step": 3726 }, { "epoch": 0.78, "learning_rate": 4.699568048737453e-06, "loss": 1.2913, "step": 3727 }, { "epoch": 0.78, "learning_rate": 4.690796063078709e-06, "loss": 1.1972, "step": 3728 }, { "epoch": 0.78, "learning_rate": 4.6820311840235745e-06, "loss": 1.2271, "step": 3729 }, { "epoch": 0.78, "learning_rate": 4.67327341564076e-06, "loss": 1.2028, "step": 3730 }, { "epoch": 0.78, "learning_rate": 4.6645227619956515e-06, "loss": 1.2191, "step": 3731 }, { "epoch": 0.78, "learning_rate": 4.655779227150352e-06, "loss": 1.2429, "step": 3732 }, { "epoch": 0.79, "learning_rate": 4.647042815163649e-06, "loss": 1.1935, "step": 3733 }, { "epoch": 0.79, "learning_rate": 4.638313530091016e-06, "loss": 1.2538, "step": 3734 }, { "epoch": 0.79, "learning_rate": 4.629591375984641e-06, "loss": 1.2513, "step": 3735 }, { "epoch": 0.79, "learning_rate": 4.620876356893385e-06, "loss": 1.204, "step": 3736 }, { "epoch": 0.79, "learning_rate": 4.612168476862789e-06, "loss": 1.2585, "step": 3737 }, { "epoch": 0.79, "learning_rate": 4.603467739935108e-06, "loss": 1.2116, "step": 3738 }, { "epoch": 0.79, "learning_rate": 4.594774150149251e-06, "loss": 1.2384, "step": 3739 }, { "epoch": 0.79, "learning_rate": 4.586087711540832e-06, "loss": 1.2116, "step": 3740 }, { "epoch": 0.79, "learning_rate": 4.57740842814213e-06, "loss": 1.2345, "step": 3741 }, { "epoch": 0.79, "learning_rate": 4.568736303982115e-06, "loss": 1.2151, "step": 3742 }, { "epoch": 0.79, "learning_rate": 4.560071343086421e-06, "loss": 1.2314, "step": 3743 }, { "epoch": 0.79, "learning_rate": 4.55141354947737e-06, "loss": 1.2186, "step": 3744 }, { "epoch": 0.79, "learning_rate": 4.542762927173941e-06, "loss": 1.2214, "step": 3745 }, { "epoch": 0.79, "learning_rate": 4.534119480191801e-06, "loss": 1.1697, "step": 3746 }, { "epoch": 0.79, "learning_rate": 4.525483212543273e-06, "loss": 1.2306, "step": 3747 }, { "epoch": 0.79, "learning_rate": 4.516854128237358e-06, "loss": 1.2209, "step": 3748 }, { "epoch": 0.79, "learning_rate": 4.5082322312797166e-06, "loss": 1.259, "step": 3749 }, { "epoch": 0.79, "learning_rate": 4.499617525672664e-06, "loss": 1.22, "step": 3750 }, { "epoch": 0.79, "learning_rate": 4.491010015415198e-06, "loss": 1.2254, "step": 3751 }, { "epoch": 0.79, "learning_rate": 4.4824097045029615e-06, "loss": 1.1593, "step": 3752 }, { "epoch": 0.79, "learning_rate": 4.473816596928251e-06, "loss": 1.2386, "step": 3753 }, { "epoch": 0.79, "learning_rate": 4.465230696680038e-06, "loss": 1.1959, "step": 3754 }, { "epoch": 0.79, "learning_rate": 4.456652007743929e-06, "loss": 1.2521, "step": 3755 }, { "epoch": 0.79, "learning_rate": 4.448080534102202e-06, "loss": 1.2475, "step": 3756 }, { "epoch": 0.79, "learning_rate": 4.439516279733764e-06, "loss": 1.2411, "step": 3757 }, { "epoch": 0.79, "learning_rate": 4.430959248614184e-06, "loss": 1.2321, "step": 3758 }, { "epoch": 0.79, "learning_rate": 4.42240944471568e-06, "loss": 1.2352, "step": 3759 }, { "epoch": 0.79, "learning_rate": 4.413866872007104e-06, "loss": 1.1946, "step": 3760 }, { "epoch": 0.79, "learning_rate": 4.405331534453967e-06, "loss": 1.2099, "step": 3761 }, { "epoch": 0.79, "learning_rate": 4.396803436018406e-06, "loss": 1.2542, "step": 3762 }, { "epoch": 0.79, "learning_rate": 4.3882825806592024e-06, "loss": 1.1668, "step": 3763 }, { "epoch": 0.79, "learning_rate": 4.379768972331784e-06, "loss": 1.3, "step": 3764 }, { "epoch": 0.79, "learning_rate": 4.371262614988196e-06, "loss": 1.275, "step": 3765 }, { "epoch": 0.79, "learning_rate": 4.362763512577144e-06, "loss": 1.2259, "step": 3766 }, { "epoch": 0.79, "learning_rate": 4.354271669043934e-06, "loss": 1.1798, "step": 3767 }, { "epoch": 0.79, "learning_rate": 4.345787088330537e-06, "loss": 1.2543, "step": 3768 }, { "epoch": 0.79, "learning_rate": 4.33730977437552e-06, "loss": 1.2457, "step": 3769 }, { "epoch": 0.79, "learning_rate": 4.328839731114101e-06, "loss": 1.2836, "step": 3770 }, { "epoch": 0.79, "learning_rate": 4.3203769624781055e-06, "loss": 1.2497, "step": 3771 }, { "epoch": 0.79, "learning_rate": 4.311921472395999e-06, "loss": 1.2518, "step": 3772 }, { "epoch": 0.79, "learning_rate": 4.303473264792857e-06, "loss": 1.2356, "step": 3773 }, { "epoch": 0.79, "learning_rate": 4.295032343590366e-06, "loss": 1.2162, "step": 3774 }, { "epoch": 0.79, "learning_rate": 4.286598712706858e-06, "loss": 1.243, "step": 3775 }, { "epoch": 0.79, "learning_rate": 4.278172376057246e-06, "loss": 1.2294, "step": 3776 }, { "epoch": 0.79, "learning_rate": 4.269753337553091e-06, "loss": 1.2745, "step": 3777 }, { "epoch": 0.79, "learning_rate": 4.2613416011025424e-06, "loss": 1.2469, "step": 3778 }, { "epoch": 0.79, "learning_rate": 4.25293717061036e-06, "loss": 1.2469, "step": 3779 }, { "epoch": 0.8, "learning_rate": 4.244540049977934e-06, "loss": 1.2199, "step": 3780 }, { "epoch": 0.8, "learning_rate": 4.236150243103234e-06, "loss": 1.2239, "step": 3781 }, { "epoch": 0.8, "learning_rate": 4.227767753880861e-06, "loss": 1.2538, "step": 3782 }, { "epoch": 0.8, "learning_rate": 4.2193925862019934e-06, "loss": 1.2101, "step": 3783 }, { "epoch": 0.8, "learning_rate": 4.211024743954424e-06, "loss": 1.2115, "step": 3784 }, { "epoch": 0.8, "learning_rate": 4.2026642310225505e-06, "loss": 1.2154, "step": 3785 }, { "epoch": 0.8, "learning_rate": 4.194311051287359e-06, "loss": 1.2042, "step": 3786 }, { "epoch": 0.8, "learning_rate": 4.185965208626428e-06, "loss": 1.2834, "step": 3787 }, { "epoch": 0.8, "learning_rate": 4.177626706913948e-06, "loss": 1.2421, "step": 3788 }, { "epoch": 0.8, "learning_rate": 4.16929555002068e-06, "loss": 1.2578, "step": 3789 }, { "epoch": 0.8, "learning_rate": 4.160971741813995e-06, "loss": 1.1894, "step": 3790 }, { "epoch": 0.8, "learning_rate": 4.152655286157834e-06, "loss": 1.2586, "step": 3791 }, { "epoch": 0.8, "learning_rate": 4.144346186912738e-06, "loss": 1.2576, "step": 3792 }, { "epoch": 0.8, "learning_rate": 4.136044447935837e-06, "loss": 1.3064, "step": 3793 }, { "epoch": 0.8, "learning_rate": 4.127750073080829e-06, "loss": 1.2095, "step": 3794 }, { "epoch": 0.8, "learning_rate": 4.119463066197997e-06, "loss": 1.2668, "step": 3795 }, { "epoch": 0.8, "learning_rate": 4.111183431134223e-06, "loss": 1.2512, "step": 3796 }, { "epoch": 0.8, "learning_rate": 4.102911171732933e-06, "loss": 1.2132, "step": 3797 }, { "epoch": 0.8, "learning_rate": 4.094646291834166e-06, "loss": 1.2339, "step": 3798 }, { "epoch": 0.8, "learning_rate": 4.086388795274508e-06, "loss": 1.2086, "step": 3799 }, { "epoch": 0.8, "learning_rate": 4.078138685887125e-06, "loss": 1.2069, "step": 3800 }, { "epoch": 0.8, "learning_rate": 4.069895967501765e-06, "loss": 1.1852, "step": 3801 }, { "epoch": 0.8, "learning_rate": 4.0616606439447315e-06, "loss": 1.2256, "step": 3802 }, { "epoch": 0.8, "learning_rate": 4.053432719038895e-06, "loss": 1.2168, "step": 3803 }, { "epoch": 0.8, "learning_rate": 4.045212196603705e-06, "loss": 1.2169, "step": 3804 }, { "epoch": 0.8, "learning_rate": 4.03699908045516e-06, "loss": 1.2411, "step": 3805 }, { "epoch": 0.8, "learning_rate": 4.028793374405833e-06, "loss": 1.2408, "step": 3806 }, { "epoch": 0.8, "learning_rate": 4.020595082264847e-06, "loss": 1.2576, "step": 3807 }, { "epoch": 0.8, "learning_rate": 4.012404207837881e-06, "loss": 1.2201, "step": 3808 }, { "epoch": 0.8, "learning_rate": 4.0042207549271905e-06, "loss": 1.2566, "step": 3809 }, { "epoch": 0.8, "learning_rate": 3.996044727331558e-06, "loss": 1.2368, "step": 3810 }, { "epoch": 0.8, "learning_rate": 3.987876128846349e-06, "loss": 1.2535, "step": 3811 }, { "epoch": 0.8, "learning_rate": 3.979714963263455e-06, "loss": 1.2385, "step": 3812 }, { "epoch": 0.8, "learning_rate": 3.971561234371324e-06, "loss": 1.2306, "step": 3813 }, { "epoch": 0.8, "learning_rate": 3.963414945954962e-06, "loss": 1.2366, "step": 3814 }, { "epoch": 0.8, "learning_rate": 3.955276101795908e-06, "loss": 1.2534, "step": 3815 }, { "epoch": 0.8, "learning_rate": 3.947144705672257e-06, "loss": 1.2208, "step": 3816 }, { "epoch": 0.8, "learning_rate": 3.939020761358641e-06, "loss": 1.241, "step": 3817 }, { "epoch": 0.8, "learning_rate": 3.930904272626226e-06, "loss": 1.2647, "step": 3818 }, { "epoch": 0.8, "learning_rate": 3.922795243242734e-06, "loss": 1.2261, "step": 3819 }, { "epoch": 0.8, "learning_rate": 3.914693676972408e-06, "loss": 1.2204, "step": 3820 }, { "epoch": 0.8, "learning_rate": 3.906599577576027e-06, "loss": 1.1952, "step": 3821 }, { "epoch": 0.8, "learning_rate": 3.898512948810922e-06, "loss": 1.2485, "step": 3822 }, { "epoch": 0.8, "learning_rate": 3.890433794430934e-06, "loss": 1.2584, "step": 3823 }, { "epoch": 0.8, "learning_rate": 3.882362118186445e-06, "loss": 1.2613, "step": 3824 }, { "epoch": 0.8, "learning_rate": 3.87429792382437e-06, "loss": 1.2003, "step": 3825 }, { "epoch": 0.8, "learning_rate": 3.86624121508814e-06, "loss": 1.246, "step": 3826 }, { "epoch": 0.8, "learning_rate": 3.858191995717722e-06, "loss": 1.2902, "step": 3827 }, { "epoch": 0.81, "learning_rate": 3.850150269449597e-06, "loss": 1.2454, "step": 3828 }, { "epoch": 0.81, "learning_rate": 3.84211604001677e-06, "loss": 1.2498, "step": 3829 }, { "epoch": 0.81, "learning_rate": 3.834089311148774e-06, "loss": 1.2363, "step": 3830 }, { "epoch": 0.81, "learning_rate": 3.826070086571651e-06, "loss": 1.2316, "step": 3831 }, { "epoch": 0.81, "learning_rate": 3.818058370007956e-06, "loss": 1.2111, "step": 3832 }, { "epoch": 0.81, "learning_rate": 3.810054165176775e-06, "loss": 1.2284, "step": 3833 }, { "epoch": 0.81, "learning_rate": 3.802057475793688e-06, "loss": 1.1595, "step": 3834 }, { "epoch": 0.81, "learning_rate": 3.794068305570804e-06, "loss": 1.2422, "step": 3835 }, { "epoch": 0.81, "learning_rate": 3.78608665821673e-06, "loss": 1.2094, "step": 3836 }, { "epoch": 0.81, "learning_rate": 3.778112537436578e-06, "loss": 1.2318, "step": 3837 }, { "epoch": 0.81, "learning_rate": 3.7701459469319824e-06, "loss": 1.268, "step": 3838 }, { "epoch": 0.81, "learning_rate": 3.7621868904010585e-06, "loss": 1.2058, "step": 3839 }, { "epoch": 0.81, "learning_rate": 3.7542353715384462e-06, "loss": 1.2071, "step": 3840 }, { "epoch": 0.81, "learning_rate": 3.7462913940352797e-06, "loss": 1.2856, "step": 3841 }, { "epoch": 0.81, "learning_rate": 3.7383549615791826e-06, "loss": 1.275, "step": 3842 }, { "epoch": 0.81, "learning_rate": 3.7304260778542924e-06, "loss": 1.2445, "step": 3843 }, { "epoch": 0.81, "learning_rate": 3.722504746541229e-06, "loss": 1.2283, "step": 3844 }, { "epoch": 0.81, "learning_rate": 3.714590971317107e-06, "loss": 1.2413, "step": 3845 }, { "epoch": 0.81, "learning_rate": 3.706684755855545e-06, "loss": 1.2882, "step": 3846 }, { "epoch": 0.81, "learning_rate": 3.698786103826639e-06, "loss": 1.2922, "step": 3847 }, { "epoch": 0.81, "learning_rate": 3.690895018896987e-06, "loss": 1.2429, "step": 3848 }, { "epoch": 0.81, "learning_rate": 3.6830115047296633e-06, "loss": 1.2325, "step": 3849 }, { "epoch": 0.81, "learning_rate": 3.675135564984227e-06, "loss": 1.1791, "step": 3850 }, { "epoch": 0.81, "learning_rate": 3.6672672033167333e-06, "loss": 1.2524, "step": 3851 }, { "epoch": 0.81, "learning_rate": 3.6594064233797123e-06, "loss": 1.2261, "step": 3852 }, { "epoch": 0.81, "learning_rate": 3.6515532288221646e-06, "loss": 1.2731, "step": 3853 }, { "epoch": 0.81, "learning_rate": 3.643707623289592e-06, "loss": 1.2579, "step": 3854 }, { "epoch": 0.81, "learning_rate": 3.635869610423952e-06, "loss": 1.2219, "step": 3855 }, { "epoch": 0.81, "learning_rate": 3.628039193863695e-06, "loss": 1.2137, "step": 3856 }, { "epoch": 0.81, "learning_rate": 3.6202163772437326e-06, "loss": 1.2421, "step": 3857 }, { "epoch": 0.81, "learning_rate": 3.6124011641954473e-06, "loss": 1.272, "step": 3858 }, { "epoch": 0.81, "learning_rate": 3.6045935583467053e-06, "loss": 1.2398, "step": 3859 }, { "epoch": 0.81, "learning_rate": 3.5967935633218277e-06, "loss": 1.2371, "step": 3860 }, { "epoch": 0.81, "learning_rate": 3.589001182741616e-06, "loss": 1.2357, "step": 3861 }, { "epoch": 0.81, "learning_rate": 3.5812164202233236e-06, "loss": 1.1327, "step": 3862 }, { "epoch": 0.81, "learning_rate": 3.5734392793806704e-06, "loss": 1.1843, "step": 3863 }, { "epoch": 0.81, "learning_rate": 3.5656697638238447e-06, "loss": 1.2136, "step": 3864 }, { "epoch": 0.81, "learning_rate": 3.5579078771594988e-06, "loss": 1.2392, "step": 3865 }, { "epoch": 0.81, "learning_rate": 3.550153622990724e-06, "loss": 1.2362, "step": 3866 }, { "epoch": 0.81, "learning_rate": 3.542407004917092e-06, "loss": 1.1976, "step": 3867 }, { "epoch": 0.81, "learning_rate": 3.5346680265346113e-06, "loss": 1.2348, "step": 3868 }, { "epoch": 0.81, "learning_rate": 3.5269366914357585e-06, "loss": 1.2678, "step": 3869 }, { "epoch": 0.81, "learning_rate": 3.5192130032094517e-06, "loss": 1.256, "step": 3870 }, { "epoch": 0.81, "learning_rate": 3.511496965441057e-06, "loss": 1.1927, "step": 3871 }, { "epoch": 0.81, "learning_rate": 3.503788581712406e-06, "loss": 1.2388, "step": 3872 }, { "epoch": 0.81, "learning_rate": 3.4960878556017597e-06, "loss": 1.2157, "step": 3873 }, { "epoch": 0.81, "learning_rate": 3.488394790683829e-06, "loss": 1.238, "step": 3874 }, { "epoch": 0.82, "learning_rate": 3.480709390529777e-06, "loss": 1.2533, "step": 3875 }, { "epoch": 0.82, "learning_rate": 3.473031658707193e-06, "loss": 1.1969, "step": 3876 }, { "epoch": 0.82, "learning_rate": 3.465361598780128e-06, "loss": 1.2303, "step": 3877 }, { "epoch": 0.82, "learning_rate": 3.4576992143090517e-06, "loss": 1.2805, "step": 3878 }, { "epoch": 0.82, "learning_rate": 3.450044508850876e-06, "loss": 1.26, "step": 3879 }, { "epoch": 0.82, "learning_rate": 3.4423974859589594e-06, "loss": 1.1821, "step": 3880 }, { "epoch": 0.82, "learning_rate": 3.4347581491830796e-06, "loss": 1.2238, "step": 3881 }, { "epoch": 0.82, "learning_rate": 3.427126502069449e-06, "loss": 1.2715, "step": 3882 }, { "epoch": 0.82, "learning_rate": 3.4195025481607224e-06, "loss": 1.2255, "step": 3883 }, { "epoch": 0.82, "learning_rate": 3.411886290995965e-06, "loss": 1.2507, "step": 3884 }, { "epoch": 0.82, "learning_rate": 3.4042777341106903e-06, "loss": 1.2484, "step": 3885 }, { "epoch": 0.82, "learning_rate": 3.3966768810368132e-06, "loss": 1.2795, "step": 3886 }, { "epoch": 0.82, "learning_rate": 3.3890837353026964e-06, "loss": 1.2657, "step": 3887 }, { "epoch": 0.82, "learning_rate": 3.3814983004331014e-06, "loss": 1.2332, "step": 3888 }, { "epoch": 0.82, "learning_rate": 3.373920579949237e-06, "loss": 1.2309, "step": 3889 }, { "epoch": 0.82, "learning_rate": 3.3663505773687023e-06, "loss": 1.2391, "step": 3890 }, { "epoch": 0.82, "learning_rate": 3.3587882962055374e-06, "loss": 1.2462, "step": 3891 }, { "epoch": 0.82, "learning_rate": 3.3512337399701813e-06, "loss": 1.2391, "step": 3892 }, { "epoch": 0.82, "learning_rate": 3.3436869121695013e-06, "loss": 1.2377, "step": 3893 }, { "epoch": 0.82, "learning_rate": 3.3361478163067673e-06, "loss": 1.2021, "step": 3894 }, { "epoch": 0.82, "learning_rate": 3.328616455881657e-06, "loss": 1.2061, "step": 3895 }, { "epoch": 0.82, "learning_rate": 3.3210928343902716e-06, "loss": 1.2055, "step": 3896 }, { "epoch": 0.82, "learning_rate": 3.3135769553251017e-06, "loss": 1.2479, "step": 3897 }, { "epoch": 0.82, "learning_rate": 3.3060688221750637e-06, "loss": 1.2068, "step": 3898 }, { "epoch": 0.82, "learning_rate": 3.2985684384254648e-06, "loss": 1.2059, "step": 3899 }, { "epoch": 0.82, "learning_rate": 3.2910758075580085e-06, "loss": 1.2377, "step": 3900 }, { "epoch": 0.82, "learning_rate": 3.283590933050822e-06, "loss": 1.2323, "step": 3901 }, { "epoch": 0.82, "learning_rate": 3.2761138183784126e-06, "loss": 1.2227, "step": 3902 }, { "epoch": 0.82, "learning_rate": 3.2686444670116878e-06, "loss": 1.1798, "step": 3903 }, { "epoch": 0.82, "learning_rate": 3.261182882417966e-06, "loss": 1.2491, "step": 3904 }, { "epoch": 0.82, "learning_rate": 3.253729068060938e-06, "loss": 1.2497, "step": 3905 }, { "epoch": 0.82, "learning_rate": 3.2462830274007073e-06, "loss": 1.2374, "step": 3906 }, { "epoch": 0.82, "learning_rate": 3.2388447638937583e-06, "loss": 1.237, "step": 3907 }, { "epoch": 0.82, "learning_rate": 3.2314142809929617e-06, "loss": 1.2184, "step": 3908 }, { "epoch": 0.82, "learning_rate": 3.223991582147592e-06, "loss": 1.2358, "step": 3909 }, { "epoch": 0.82, "learning_rate": 3.216576670803291e-06, "loss": 1.1994, "step": 3910 }, { "epoch": 0.82, "learning_rate": 3.2091695504021047e-06, "loss": 1.2259, "step": 3911 }, { "epoch": 0.82, "learning_rate": 3.2017702243824434e-06, "loss": 1.2491, "step": 3912 }, { "epoch": 0.82, "learning_rate": 3.1943786961791166e-06, "loss": 1.25, "step": 3913 }, { "epoch": 0.82, "learning_rate": 3.1869949692232982e-06, "loss": 1.1986, "step": 3914 }, { "epoch": 0.82, "learning_rate": 3.179619046942557e-06, "loss": 1.2398, "step": 3915 }, { "epoch": 0.82, "learning_rate": 3.172250932760823e-06, "loss": 1.1818, "step": 3916 }, { "epoch": 0.82, "learning_rate": 3.164890630098416e-06, "loss": 1.2342, "step": 3917 }, { "epoch": 0.82, "learning_rate": 3.1575381423720142e-06, "loss": 1.2302, "step": 3918 }, { "epoch": 0.82, "learning_rate": 3.150193472994687e-06, "loss": 1.2234, "step": 3919 }, { "epoch": 0.82, "learning_rate": 3.142856625375856e-06, "loss": 1.2781, "step": 3920 }, { "epoch": 0.82, "learning_rate": 3.13552760292132e-06, "loss": 1.2061, "step": 3921 }, { "epoch": 0.82, "learning_rate": 3.1282064090332522e-06, "loss": 1.1835, "step": 3922 }, { "epoch": 0.83, "learning_rate": 3.1208930471101786e-06, "loss": 1.219, "step": 3923 }, { "epoch": 0.83, "learning_rate": 3.1135875205469946e-06, "loss": 1.2042, "step": 3924 }, { "epoch": 0.83, "learning_rate": 3.1062898327349656e-06, "loss": 1.2249, "step": 3925 }, { "epoch": 0.83, "learning_rate": 3.098999987061706e-06, "loss": 1.2185, "step": 3926 }, { "epoch": 0.83, "learning_rate": 3.0917179869112023e-06, "loss": 1.2449, "step": 3927 }, { "epoch": 0.83, "learning_rate": 3.084443835663791e-06, "loss": 1.1974, "step": 3928 }, { "epoch": 0.83, "learning_rate": 3.077177536696159e-06, "loss": 1.259, "step": 3929 }, { "epoch": 0.83, "learning_rate": 3.0699190933813683e-06, "loss": 1.2247, "step": 3930 }, { "epoch": 0.83, "learning_rate": 3.0626685090888177e-06, "loss": 1.2188, "step": 3931 }, { "epoch": 0.83, "learning_rate": 3.0554257871842543e-06, "loss": 1.2518, "step": 3932 }, { "epoch": 0.83, "learning_rate": 3.0481909310297954e-06, "loss": 1.2321, "step": 3933 }, { "epoch": 0.83, "learning_rate": 3.0409639439838833e-06, "loss": 1.2839, "step": 3934 }, { "epoch": 0.83, "learning_rate": 3.0337448294013307e-06, "loss": 1.2261, "step": 3935 }, { "epoch": 0.83, "learning_rate": 3.0265335906332717e-06, "loss": 1.1707, "step": 3936 }, { "epoch": 0.83, "learning_rate": 3.019330231027209e-06, "loss": 1.2104, "step": 3937 }, { "epoch": 0.83, "learning_rate": 3.012134753926965e-06, "loss": 1.2262, "step": 3938 }, { "epoch": 0.83, "learning_rate": 3.0049471626727246e-06, "loss": 1.2489, "step": 3939 }, { "epoch": 0.83, "learning_rate": 2.997767460600991e-06, "loss": 1.2649, "step": 3940 }, { "epoch": 0.83, "learning_rate": 2.990595651044621e-06, "loss": 1.1955, "step": 3941 }, { "epoch": 0.83, "learning_rate": 2.9834317373327983e-06, "loss": 1.2643, "step": 3942 }, { "epoch": 0.83, "learning_rate": 2.976275722791051e-06, "loss": 1.2107, "step": 3943 }, { "epoch": 0.83, "learning_rate": 2.9691276107412293e-06, "loss": 1.2201, "step": 3944 }, { "epoch": 0.83, "learning_rate": 2.961987404501516e-06, "loss": 1.2721, "step": 3945 }, { "epoch": 0.83, "learning_rate": 2.9548551073864386e-06, "loss": 1.2075, "step": 3946 }, { "epoch": 0.83, "learning_rate": 2.947730722706832e-06, "loss": 1.236, "step": 3947 }, { "epoch": 0.83, "learning_rate": 2.94061425376988e-06, "loss": 1.2498, "step": 3948 }, { "epoch": 0.83, "learning_rate": 2.9335057038790715e-06, "loss": 1.1947, "step": 3949 }, { "epoch": 0.83, "learning_rate": 2.9264050763342267e-06, "loss": 1.2651, "step": 3950 }, { "epoch": 0.83, "learning_rate": 2.9193123744315e-06, "loss": 1.2381, "step": 3951 }, { "epoch": 0.83, "learning_rate": 2.912227601463351e-06, "loss": 1.2728, "step": 3952 }, { "epoch": 0.83, "learning_rate": 2.9051507607185603e-06, "loss": 1.2692, "step": 3953 }, { "epoch": 0.83, "learning_rate": 2.8980818554822376e-06, "loss": 1.2416, "step": 3954 }, { "epoch": 0.83, "learning_rate": 2.8910208890357916e-06, "loss": 1.2354, "step": 3955 }, { "epoch": 0.83, "learning_rate": 2.883967864656969e-06, "loss": 1.2996, "step": 3956 }, { "epoch": 0.83, "learning_rate": 2.876922785619809e-06, "loss": 1.2389, "step": 3957 }, { "epoch": 0.83, "learning_rate": 2.8698856551946664e-06, "loss": 1.1833, "step": 3958 }, { "epoch": 0.83, "learning_rate": 2.8628564766482193e-06, "loss": 1.2702, "step": 3959 }, { "epoch": 0.83, "learning_rate": 2.855835253243433e-06, "loss": 1.2147, "step": 3960 }, { "epoch": 0.83, "learning_rate": 2.848821988239605e-06, "loss": 1.242, "step": 3961 }, { "epoch": 0.83, "learning_rate": 2.8418166848923158e-06, "loss": 1.1824, "step": 3962 }, { "epoch": 0.83, "learning_rate": 2.834819346453468e-06, "loss": 1.2445, "step": 3963 }, { "epoch": 0.83, "learning_rate": 2.827829976171248e-06, "loss": 1.2354, "step": 3964 }, { "epoch": 0.83, "learning_rate": 2.820848577290165e-06, "loss": 1.2483, "step": 3965 }, { "epoch": 0.83, "learning_rate": 2.8138751530510065e-06, "loss": 1.2257, "step": 3966 }, { "epoch": 0.83, "learning_rate": 2.806909706690881e-06, "loss": 1.2184, "step": 3967 }, { "epoch": 0.83, "learning_rate": 2.799952241443167e-06, "loss": 1.3169, "step": 3968 }, { "epoch": 0.83, "learning_rate": 2.7930027605375644e-06, "loss": 1.2348, "step": 3969 }, { "epoch": 0.84, "learning_rate": 2.7860612672000485e-06, "loss": 1.2304, "step": 3970 }, { "epoch": 0.84, "learning_rate": 2.7791277646528893e-06, "loss": 1.2483, "step": 3971 }, { "epoch": 0.84, "learning_rate": 2.77220225611466e-06, "loss": 1.2624, "step": 3972 }, { "epoch": 0.84, "learning_rate": 2.7652847448002074e-06, "loss": 1.224, "step": 3973 }, { "epoch": 0.84, "learning_rate": 2.7583752339206714e-06, "loss": 1.2417, "step": 3974 }, { "epoch": 0.84, "learning_rate": 2.7514737266834845e-06, "loss": 1.1722, "step": 3975 }, { "epoch": 0.84, "learning_rate": 2.7445802262923505e-06, "loss": 1.1919, "step": 3976 }, { "epoch": 0.84, "learning_rate": 2.737694735947276e-06, "loss": 1.1951, "step": 3977 }, { "epoch": 0.84, "learning_rate": 2.730817258844529e-06, "loss": 1.2739, "step": 3978 }, { "epoch": 0.84, "learning_rate": 2.723947798176665e-06, "loss": 1.1915, "step": 3979 }, { "epoch": 0.84, "learning_rate": 2.7170863571325257e-06, "loss": 1.2676, "step": 3980 }, { "epoch": 0.84, "learning_rate": 2.7102329388972215e-06, "loss": 1.2477, "step": 3981 }, { "epoch": 0.84, "learning_rate": 2.7033875466521363e-06, "loss": 1.2373, "step": 3982 }, { "epoch": 0.84, "learning_rate": 2.696550183574942e-06, "loss": 1.213, "step": 3983 }, { "epoch": 0.84, "learning_rate": 2.6897208528395656e-06, "loss": 1.1821, "step": 3984 }, { "epoch": 0.84, "learning_rate": 2.682899557616223e-06, "loss": 1.2195, "step": 3985 }, { "epoch": 0.84, "learning_rate": 2.676086301071381e-06, "loss": 1.2169, "step": 3986 }, { "epoch": 0.84, "learning_rate": 2.66928108636779e-06, "loss": 1.2245, "step": 3987 }, { "epoch": 0.84, "learning_rate": 2.662483916664467e-06, "loss": 1.2297, "step": 3988 }, { "epoch": 0.84, "learning_rate": 2.6556947951166836e-06, "loss": 1.1888, "step": 3989 }, { "epoch": 0.84, "learning_rate": 2.648913724875981e-06, "loss": 1.2278, "step": 3990 }, { "epoch": 0.84, "learning_rate": 2.6421407090901707e-06, "loss": 1.2259, "step": 3991 }, { "epoch": 0.84, "learning_rate": 2.635375750903306e-06, "loss": 1.2322, "step": 3992 }, { "epoch": 0.84, "learning_rate": 2.628618853455727e-06, "loss": 1.2886, "step": 3993 }, { "epoch": 0.84, "learning_rate": 2.621870019884005e-06, "loss": 1.2436, "step": 3994 }, { "epoch": 0.84, "learning_rate": 2.6151292533209826e-06, "loss": 1.2344, "step": 3995 }, { "epoch": 0.84, "learning_rate": 2.6083965568957603e-06, "loss": 1.2718, "step": 3996 }, { "epoch": 0.84, "learning_rate": 2.601671933733678e-06, "loss": 1.2734, "step": 3997 }, { "epoch": 0.84, "learning_rate": 2.594955386956346e-06, "loss": 1.2458, "step": 3998 }, { "epoch": 0.84, "learning_rate": 2.588246919681614e-06, "loss": 1.2649, "step": 3999 }, { "epoch": 0.84, "learning_rate": 2.5815465350235756e-06, "loss": 1.2268, "step": 4000 }, { "epoch": 0.84, "learning_rate": 2.5748542360925944e-06, "loss": 1.2762, "step": 4001 }, { "epoch": 0.84, "learning_rate": 2.568170025995258e-06, "loss": 1.2597, "step": 4002 }, { "epoch": 0.84, "learning_rate": 2.561493907834405e-06, "loss": 1.2471, "step": 4003 }, { "epoch": 0.84, "learning_rate": 2.5548258847091266e-06, "loss": 1.2999, "step": 4004 }, { "epoch": 0.84, "learning_rate": 2.548165959714748e-06, "loss": 1.1962, "step": 4005 }, { "epoch": 0.84, "learning_rate": 2.54151413594284e-06, "loss": 1.2221, "step": 4006 }, { "epoch": 0.84, "learning_rate": 2.534870416481208e-06, "loss": 1.2091, "step": 4007 }, { "epoch": 0.84, "learning_rate": 2.5282348044138915e-06, "loss": 1.1968, "step": 4008 }, { "epoch": 0.84, "learning_rate": 2.521607302821183e-06, "loss": 1.2388, "step": 4009 }, { "epoch": 0.84, "learning_rate": 2.514987914779592e-06, "loss": 1.1939, "step": 4010 }, { "epoch": 0.84, "learning_rate": 2.5083766433618695e-06, "loss": 1.2559, "step": 4011 }, { "epoch": 0.84, "learning_rate": 2.5017734916370073e-06, "loss": 1.2481, "step": 4012 }, { "epoch": 0.84, "learning_rate": 2.495178462670207e-06, "loss": 1.2342, "step": 4013 }, { "epoch": 0.84, "learning_rate": 2.4885915595229215e-06, "loss": 1.2446, "step": 4014 }, { "epoch": 0.84, "learning_rate": 2.4820127852528163e-06, "loss": 1.2516, "step": 4015 }, { "epoch": 0.84, "learning_rate": 2.4754421429137887e-06, "loss": 1.138, "step": 4016 }, { "epoch": 0.84, "learning_rate": 2.468879635555965e-06, "loss": 1.231, "step": 4017 }, { "epoch": 0.85, "learning_rate": 2.462325266225687e-06, "loss": 1.1968, "step": 4018 }, { "epoch": 0.85, "learning_rate": 2.455779037965529e-06, "loss": 1.2174, "step": 4019 }, { "epoch": 0.85, "learning_rate": 2.4492409538142803e-06, "loss": 1.263, "step": 4020 }, { "epoch": 0.85, "learning_rate": 2.44271101680694e-06, "loss": 1.2434, "step": 4021 }, { "epoch": 0.85, "learning_rate": 2.436189229974748e-06, "loss": 1.2122, "step": 4022 }, { "epoch": 0.85, "learning_rate": 2.4296755963451424e-06, "loss": 1.2643, "step": 4023 }, { "epoch": 0.85, "learning_rate": 2.423170118941778e-06, "loss": 1.2076, "step": 4024 }, { "epoch": 0.85, "learning_rate": 2.4166728007845364e-06, "loss": 1.212, "step": 4025 }, { "epoch": 0.85, "learning_rate": 2.4101836448894924e-06, "loss": 1.2976, "step": 4026 }, { "epoch": 0.85, "learning_rate": 2.4037026542689555e-06, "loss": 1.2679, "step": 4027 }, { "epoch": 0.85, "learning_rate": 2.3972298319314224e-06, "loss": 1.2742, "step": 4028 }, { "epoch": 0.85, "learning_rate": 2.3907651808816067e-06, "loss": 1.2112, "step": 4029 }, { "epoch": 0.85, "learning_rate": 2.384308704120435e-06, "loss": 1.2603, "step": 4030 }, { "epoch": 0.85, "learning_rate": 2.3778604046450313e-06, "loss": 1.2175, "step": 4031 }, { "epoch": 0.85, "learning_rate": 2.371420285448722e-06, "loss": 1.1484, "step": 4032 }, { "epoch": 0.85, "learning_rate": 2.364988349521049e-06, "loss": 1.2499, "step": 4033 }, { "epoch": 0.85, "learning_rate": 2.358564599847737e-06, "loss": 1.2726, "step": 4034 }, { "epoch": 0.85, "learning_rate": 2.352149039410727e-06, "loss": 1.207, "step": 4035 }, { "epoch": 0.85, "learning_rate": 2.345741671188153e-06, "loss": 1.231, "step": 4036 }, { "epoch": 0.85, "learning_rate": 2.33934249815434e-06, "loss": 1.2326, "step": 4037 }, { "epoch": 0.85, "learning_rate": 2.3329515232798207e-06, "loss": 1.259, "step": 4038 }, { "epoch": 0.85, "learning_rate": 2.3265687495313106e-06, "loss": 1.2424, "step": 4039 }, { "epoch": 0.85, "learning_rate": 2.3201941798717176e-06, "loss": 1.2398, "step": 4040 }, { "epoch": 0.85, "learning_rate": 2.313827817260159e-06, "loss": 1.2136, "step": 4041 }, { "epoch": 0.85, "learning_rate": 2.307469664651918e-06, "loss": 1.1884, "step": 4042 }, { "epoch": 0.85, "learning_rate": 2.3011197249984886e-06, "loss": 1.2505, "step": 4043 }, { "epoch": 0.85, "learning_rate": 2.2947780012475396e-06, "loss": 1.2538, "step": 4044 }, { "epoch": 0.85, "learning_rate": 2.2884444963429188e-06, "loss": 1.1803, "step": 4045 }, { "epoch": 0.85, "learning_rate": 2.282119213224683e-06, "loss": 1.1689, "step": 4046 }, { "epoch": 0.85, "learning_rate": 2.2758021548290478e-06, "loss": 1.2234, "step": 4047 }, { "epoch": 0.85, "learning_rate": 2.2694933240884277e-06, "loss": 1.2584, "step": 4048 }, { "epoch": 0.85, "learning_rate": 2.263192723931409e-06, "loss": 1.1257, "step": 4049 }, { "epoch": 0.85, "learning_rate": 2.2569003572827543e-06, "loss": 1.2695, "step": 4050 }, { "epoch": 0.85, "learning_rate": 2.250616227063418e-06, "loss": 1.2412, "step": 4051 }, { "epoch": 0.85, "learning_rate": 2.244340336190518e-06, "loss": 1.2503, "step": 4052 }, { "epoch": 0.85, "learning_rate": 2.2380726875773507e-06, "loss": 1.2265, "step": 4053 }, { "epoch": 0.85, "learning_rate": 2.2318132841333906e-06, "loss": 1.2291, "step": 4054 }, { "epoch": 0.85, "learning_rate": 2.2255621287642805e-06, "loss": 1.2869, "step": 4055 }, { "epoch": 0.85, "learning_rate": 2.2193192243718385e-06, "loss": 1.2072, "step": 4056 }, { "epoch": 0.85, "learning_rate": 2.2130845738540475e-06, "loss": 1.2413, "step": 4057 }, { "epoch": 0.85, "learning_rate": 2.2068581801050557e-06, "loss": 1.2431, "step": 4058 }, { "epoch": 0.85, "learning_rate": 2.2006400460151923e-06, "loss": 1.2413, "step": 4059 }, { "epoch": 0.85, "learning_rate": 2.1944301744709428e-06, "loss": 1.2783, "step": 4060 }, { "epoch": 0.85, "learning_rate": 2.1882285683549555e-06, "loss": 1.1931, "step": 4061 }, { "epoch": 0.85, "learning_rate": 2.1820352305460492e-06, "loss": 1.2397, "step": 4062 }, { "epoch": 0.85, "learning_rate": 2.1758501639191908e-06, "loss": 1.1991, "step": 4063 }, { "epoch": 0.85, "learning_rate": 2.169673371345531e-06, "loss": 1.2471, "step": 4064 }, { "epoch": 0.86, "learning_rate": 2.1635048556923555e-06, "loss": 1.2106, "step": 4065 }, { "epoch": 0.86, "learning_rate": 2.1573446198231185e-06, "loss": 1.1624, "step": 4066 }, { "epoch": 0.86, "learning_rate": 2.1511926665974324e-06, "loss": 1.2528, "step": 4067 }, { "epoch": 0.86, "learning_rate": 2.1450489988710644e-06, "loss": 1.1853, "step": 4068 }, { "epoch": 0.86, "learning_rate": 2.138913619495928e-06, "loss": 1.1842, "step": 4069 }, { "epoch": 0.86, "learning_rate": 2.1327865313201015e-06, "loss": 1.2136, "step": 4070 }, { "epoch": 0.86, "learning_rate": 2.1266677371877996e-06, "loss": 1.1817, "step": 4071 }, { "epoch": 0.86, "learning_rate": 2.120557239939405e-06, "loss": 1.1964, "step": 4072 }, { "epoch": 0.86, "learning_rate": 2.114455042411432e-06, "loss": 1.2304, "step": 4073 }, { "epoch": 0.86, "learning_rate": 2.108361147436546e-06, "loss": 1.2244, "step": 4074 }, { "epoch": 0.86, "learning_rate": 2.1022755578435715e-06, "loss": 1.2338, "step": 4075 }, { "epoch": 0.86, "learning_rate": 2.0961982764574597e-06, "loss": 1.1968, "step": 4076 }, { "epoch": 0.86, "learning_rate": 2.0901293060993154e-06, "loss": 1.2226, "step": 4077 }, { "epoch": 0.86, "learning_rate": 2.0840686495863837e-06, "loss": 1.2201, "step": 4078 }, { "epoch": 0.86, "learning_rate": 2.078016309732047e-06, "loss": 1.2608, "step": 4079 }, { "epoch": 0.86, "learning_rate": 2.0719722893458317e-06, "loss": 1.2443, "step": 4080 }, { "epoch": 0.86, "learning_rate": 2.0659365912333972e-06, "loss": 1.202, "step": 4081 }, { "epoch": 0.86, "learning_rate": 2.0599092181965474e-06, "loss": 1.1398, "step": 4082 }, { "epoch": 0.86, "learning_rate": 2.0538901730332128e-06, "loss": 1.2773, "step": 4083 }, { "epoch": 0.86, "learning_rate": 2.047879458537465e-06, "loss": 1.2794, "step": 4084 }, { "epoch": 0.86, "learning_rate": 2.0418770774995034e-06, "loss": 1.2047, "step": 4085 }, { "epoch": 0.86, "learning_rate": 2.0358830327056633e-06, "loss": 1.2397, "step": 4086 }, { "epoch": 0.86, "learning_rate": 2.0298973269384037e-06, "loss": 1.2516, "step": 4087 }, { "epoch": 0.86, "learning_rate": 2.023919962976324e-06, "loss": 1.2235, "step": 4088 }, { "epoch": 0.86, "learning_rate": 2.0179509435941403e-06, "loss": 1.1876, "step": 4089 }, { "epoch": 0.86, "learning_rate": 2.011990271562696e-06, "loss": 1.2351, "step": 4090 }, { "epoch": 0.86, "learning_rate": 2.006037949648971e-06, "loss": 1.2192, "step": 4091 }, { "epoch": 0.86, "learning_rate": 2.000093980616051e-06, "loss": 1.2409, "step": 4092 }, { "epoch": 0.86, "learning_rate": 1.9941583672231624e-06, "loss": 1.2603, "step": 4093 }, { "epoch": 0.86, "learning_rate": 1.9882311122256425e-06, "loss": 1.2002, "step": 4094 }, { "epoch": 0.86, "learning_rate": 1.9823122183749443e-06, "loss": 1.2564, "step": 4095 }, { "epoch": 0.86, "learning_rate": 1.9764016884186545e-06, "loss": 1.1684, "step": 4096 }, { "epoch": 0.86, "learning_rate": 1.9704995251004622e-06, "loss": 1.2472, "step": 4097 }, { "epoch": 0.86, "learning_rate": 1.9646057311601853e-06, "loss": 1.2206, "step": 4098 }, { "epoch": 0.86, "learning_rate": 1.958720309333746e-06, "loss": 1.1955, "step": 4099 }, { "epoch": 0.86, "learning_rate": 1.952843262353181e-06, "loss": 1.1952, "step": 4100 }, { "epoch": 0.86, "learning_rate": 1.946974592946651e-06, "loss": 1.212, "step": 4101 }, { "epoch": 0.86, "learning_rate": 1.9411143038384163e-06, "loss": 1.1871, "step": 4102 }, { "epoch": 0.86, "learning_rate": 1.935262397748845e-06, "loss": 1.2422, "step": 4103 }, { "epoch": 0.86, "learning_rate": 1.929418877394429e-06, "loss": 1.1622, "step": 4104 }, { "epoch": 0.86, "learning_rate": 1.923583745487747e-06, "loss": 1.2082, "step": 4105 }, { "epoch": 0.86, "learning_rate": 1.917757004737506e-06, "loss": 1.2459, "step": 4106 }, { "epoch": 0.86, "learning_rate": 1.9119386578484934e-06, "loss": 1.2342, "step": 4107 }, { "epoch": 0.86, "learning_rate": 1.906128707521624e-06, "loss": 1.2289, "step": 4108 }, { "epoch": 0.86, "learning_rate": 1.900327156453896e-06, "loss": 1.2199, "step": 4109 }, { "epoch": 0.86, "learning_rate": 1.894534007338422e-06, "loss": 1.221, "step": 4110 }, { "epoch": 0.86, "learning_rate": 1.8887492628644022e-06, "loss": 1.1752, "step": 4111 }, { "epoch": 0.86, "learning_rate": 1.8829729257171503e-06, "loss": 1.2397, "step": 4112 }, { "epoch": 0.87, "learning_rate": 1.8772049985780616e-06, "loss": 1.216, "step": 4113 }, { "epoch": 0.87, "learning_rate": 1.871445484124641e-06, "loss": 1.2169, "step": 4114 }, { "epoch": 0.87, "learning_rate": 1.8656943850304765e-06, "loss": 1.2489, "step": 4115 }, { "epoch": 0.87, "learning_rate": 1.8599517039652548e-06, "loss": 1.2228, "step": 4116 }, { "epoch": 0.87, "learning_rate": 1.8542174435947614e-06, "loss": 1.2122, "step": 4117 }, { "epoch": 0.87, "learning_rate": 1.8484916065808622e-06, "loss": 1.2304, "step": 4118 }, { "epoch": 0.87, "learning_rate": 1.8427741955815138e-06, "loss": 1.2397, "step": 4119 }, { "epoch": 0.87, "learning_rate": 1.8370652132507705e-06, "loss": 1.1906, "step": 4120 }, { "epoch": 0.87, "learning_rate": 1.8313646622387639e-06, "loss": 1.2633, "step": 4121 }, { "epoch": 0.87, "learning_rate": 1.8256725451917233e-06, "loss": 1.2508, "step": 4122 }, { "epoch": 0.87, "learning_rate": 1.8199888647519537e-06, "loss": 1.1996, "step": 4123 }, { "epoch": 0.87, "learning_rate": 1.8143136235578374e-06, "loss": 1.2186, "step": 4124 }, { "epoch": 0.87, "learning_rate": 1.8086468242438582e-06, "loss": 1.1936, "step": 4125 }, { "epoch": 0.87, "learning_rate": 1.8029884694405631e-06, "loss": 1.3133, "step": 4126 }, { "epoch": 0.87, "learning_rate": 1.7973385617745953e-06, "loss": 1.1846, "step": 4127 }, { "epoch": 0.87, "learning_rate": 1.7916971038686614e-06, "loss": 1.2062, "step": 4128 }, { "epoch": 0.87, "learning_rate": 1.7860640983415533e-06, "loss": 1.2634, "step": 4129 }, { "epoch": 0.87, "learning_rate": 1.7804395478081416e-06, "loss": 1.2399, "step": 4130 }, { "epoch": 0.87, "learning_rate": 1.774823454879362e-06, "loss": 1.2037, "step": 4131 }, { "epoch": 0.87, "learning_rate": 1.7692158221622379e-06, "loss": 1.1931, "step": 4132 }, { "epoch": 0.87, "learning_rate": 1.763616652259854e-06, "loss": 1.2231, "step": 4133 }, { "epoch": 0.87, "learning_rate": 1.758025947771378e-06, "loss": 1.2224, "step": 4134 }, { "epoch": 0.87, "learning_rate": 1.752443711292029e-06, "loss": 1.1991, "step": 4135 }, { "epoch": 0.87, "learning_rate": 1.7468699454131211e-06, "loss": 1.2411, "step": 4136 }, { "epoch": 0.87, "learning_rate": 1.741304652722009e-06, "loss": 1.2054, "step": 4137 }, { "epoch": 0.87, "learning_rate": 1.7357478358021374e-06, "loss": 1.2458, "step": 4138 }, { "epoch": 0.87, "learning_rate": 1.7301994972330028e-06, "loss": 1.2315, "step": 4139 }, { "epoch": 0.87, "learning_rate": 1.724659639590167e-06, "loss": 1.2182, "step": 4140 }, { "epoch": 0.87, "learning_rate": 1.7191282654452646e-06, "loss": 1.1663, "step": 4141 }, { "epoch": 0.87, "learning_rate": 1.7136053773659766e-06, "loss": 1.2193, "step": 4142 }, { "epoch": 0.87, "learning_rate": 1.7080909779160615e-06, "loss": 1.269, "step": 4143 }, { "epoch": 0.87, "learning_rate": 1.7025850696553248e-06, "loss": 1.2466, "step": 4144 }, { "epoch": 0.87, "learning_rate": 1.6970876551396309e-06, "loss": 1.2437, "step": 4145 }, { "epoch": 0.87, "learning_rate": 1.6915987369209142e-06, "loss": 1.2028, "step": 4146 }, { "epoch": 0.87, "learning_rate": 1.6861183175471495e-06, "loss": 1.2666, "step": 4147 }, { "epoch": 0.87, "learning_rate": 1.6806463995623735e-06, "loss": 1.2602, "step": 4148 }, { "epoch": 0.87, "learning_rate": 1.6751829855066804e-06, "loss": 1.1981, "step": 4149 }, { "epoch": 0.87, "learning_rate": 1.669728077916206e-06, "loss": 1.2195, "step": 4150 }, { "epoch": 0.87, "learning_rate": 1.6642816793231499e-06, "loss": 1.2094, "step": 4151 }, { "epoch": 0.87, "learning_rate": 1.6588437922557533e-06, "loss": 1.2068, "step": 4152 }, { "epoch": 0.87, "learning_rate": 1.6534144192383038e-06, "loss": 1.2506, "step": 4153 }, { "epoch": 0.87, "learning_rate": 1.6479935627911481e-06, "loss": 1.2311, "step": 4154 }, { "epoch": 0.87, "learning_rate": 1.6425812254306707e-06, "loss": 1.2384, "step": 4155 }, { "epoch": 0.87, "learning_rate": 1.637177409669304e-06, "loss": 1.2, "step": 4156 }, { "epoch": 0.87, "learning_rate": 1.6317821180155214e-06, "loss": 1.2337, "step": 4157 }, { "epoch": 0.87, "learning_rate": 1.6263953529738464e-06, "loss": 1.2668, "step": 4158 }, { "epoch": 0.87, "learning_rate": 1.621017117044843e-06, "loss": 1.2193, "step": 4159 }, { "epoch": 0.88, "learning_rate": 1.6156474127251077e-06, "loss": 1.2489, "step": 4160 }, { "epoch": 0.88, "learning_rate": 1.6102862425072818e-06, "loss": 1.2418, "step": 4161 }, { "epoch": 0.88, "learning_rate": 1.6049336088800505e-06, "loss": 1.215, "step": 4162 }, { "epoch": 0.88, "learning_rate": 1.5995895143281236e-06, "loss": 1.2294, "step": 4163 }, { "epoch": 0.88, "learning_rate": 1.5942539613322638e-06, "loss": 1.1854, "step": 4164 }, { "epoch": 0.88, "learning_rate": 1.5889269523692541e-06, "loss": 1.205, "step": 4165 }, { "epoch": 0.88, "learning_rate": 1.5836084899119165e-06, "loss": 1.1918, "step": 4166 }, { "epoch": 0.88, "learning_rate": 1.5782985764291091e-06, "loss": 1.2247, "step": 4167 }, { "epoch": 0.88, "learning_rate": 1.5729972143857164e-06, "loss": 1.2258, "step": 4168 }, { "epoch": 0.88, "learning_rate": 1.567704406242654e-06, "loss": 1.1779, "step": 4169 }, { "epoch": 0.88, "learning_rate": 1.5624201544568717e-06, "loss": 1.2531, "step": 4170 }, { "epoch": 0.88, "learning_rate": 1.557144461481337e-06, "loss": 1.1995, "step": 4171 }, { "epoch": 0.88, "learning_rate": 1.5518773297650613e-06, "loss": 1.2137, "step": 4172 }, { "epoch": 0.88, "learning_rate": 1.5466187617530647e-06, "loss": 1.2479, "step": 4173 }, { "epoch": 0.88, "learning_rate": 1.541368759886397e-06, "loss": 1.0979, "step": 4174 }, { "epoch": 0.88, "learning_rate": 1.5361273266021392e-06, "loss": 1.2153, "step": 4175 }, { "epoch": 0.88, "learning_rate": 1.5308944643333857e-06, "loss": 1.2625, "step": 4176 }, { "epoch": 0.88, "learning_rate": 1.5256701755092574e-06, "loss": 1.2282, "step": 4177 }, { "epoch": 0.88, "learning_rate": 1.5204544625548922e-06, "loss": 1.2628, "step": 4178 }, { "epoch": 0.88, "learning_rate": 1.5152473278914447e-06, "loss": 1.2377, "step": 4179 }, { "epoch": 0.88, "learning_rate": 1.5100487739360993e-06, "loss": 1.2581, "step": 4180 }, { "epoch": 0.88, "learning_rate": 1.5048588031020405e-06, "loss": 1.228, "step": 4181 }, { "epoch": 0.88, "learning_rate": 1.4996774177984818e-06, "loss": 1.2054, "step": 4182 }, { "epoch": 0.88, "learning_rate": 1.494504620430648e-06, "loss": 1.25, "step": 4183 }, { "epoch": 0.88, "learning_rate": 1.489340413399769e-06, "loss": 1.2274, "step": 4184 }, { "epoch": 0.88, "learning_rate": 1.484184799103101e-06, "loss": 1.2219, "step": 4185 }, { "epoch": 0.88, "learning_rate": 1.4790377799339007e-06, "loss": 1.2518, "step": 4186 }, { "epoch": 0.88, "learning_rate": 1.4738993582814343e-06, "loss": 1.2306, "step": 4187 }, { "epoch": 0.88, "learning_rate": 1.4687695365309895e-06, "loss": 1.2787, "step": 4188 }, { "epoch": 0.88, "learning_rate": 1.4636483170638505e-06, "loss": 1.2192, "step": 4189 }, { "epoch": 0.88, "learning_rate": 1.4585357022573043e-06, "loss": 1.2671, "step": 4190 }, { "epoch": 0.88, "learning_rate": 1.4534316944846595e-06, "loss": 1.2599, "step": 4191 }, { "epoch": 0.88, "learning_rate": 1.4483362961152114e-06, "loss": 1.1853, "step": 4192 }, { "epoch": 0.88, "learning_rate": 1.4432495095142796e-06, "loss": 1.2101, "step": 4193 }, { "epoch": 0.88, "learning_rate": 1.438171337043164e-06, "loss": 1.2596, "step": 4194 }, { "epoch": 0.88, "learning_rate": 1.4331017810591764e-06, "loss": 1.2582, "step": 4195 }, { "epoch": 0.88, "learning_rate": 1.4280408439156369e-06, "loss": 1.2308, "step": 4196 }, { "epoch": 0.88, "learning_rate": 1.4229885279618461e-06, "loss": 1.2049, "step": 4197 }, { "epoch": 0.88, "learning_rate": 1.4179448355431168e-06, "loss": 1.2557, "step": 4198 }, { "epoch": 0.88, "learning_rate": 1.4129097690007543e-06, "loss": 1.2506, "step": 4199 }, { "epoch": 0.88, "learning_rate": 1.4078833306720573e-06, "loss": 1.216, "step": 4200 }, { "epoch": 0.88, "learning_rate": 1.4028655228903286e-06, "loss": 1.2186, "step": 4201 }, { "epoch": 0.88, "learning_rate": 1.3978563479848538e-06, "loss": 1.2294, "step": 4202 }, { "epoch": 0.88, "learning_rate": 1.3928558082809107e-06, "loss": 1.2112, "step": 4203 }, { "epoch": 0.88, "learning_rate": 1.3878639060997822e-06, "loss": 1.2523, "step": 4204 }, { "epoch": 0.88, "learning_rate": 1.3828806437587216e-06, "loss": 1.3306, "step": 4205 }, { "epoch": 0.88, "learning_rate": 1.3779060235709918e-06, "loss": 1.2433, "step": 4206 }, { "epoch": 0.88, "learning_rate": 1.3729400478458322e-06, "loss": 1.1959, "step": 4207 }, { "epoch": 0.89, "learning_rate": 1.3679827188884675e-06, "loss": 1.2551, "step": 4208 }, { "epoch": 0.89, "learning_rate": 1.3630340390001195e-06, "loss": 1.1318, "step": 4209 }, { "epoch": 0.89, "learning_rate": 1.358094010477986e-06, "loss": 1.2404, "step": 4210 }, { "epoch": 0.89, "learning_rate": 1.353162635615246e-06, "loss": 1.2242, "step": 4211 }, { "epoch": 0.89, "learning_rate": 1.3482399167010752e-06, "loss": 1.2531, "step": 4212 }, { "epoch": 0.89, "learning_rate": 1.3433258560206165e-06, "loss": 1.2312, "step": 4213 }, { "epoch": 0.89, "learning_rate": 1.3384204558550028e-06, "loss": 1.2286, "step": 4214 }, { "epoch": 0.89, "learning_rate": 1.333523718481342e-06, "loss": 1.1752, "step": 4215 }, { "epoch": 0.89, "learning_rate": 1.3286356461727202e-06, "loss": 1.1919, "step": 4216 }, { "epoch": 0.89, "learning_rate": 1.3237562411982086e-06, "loss": 1.2456, "step": 4217 }, { "epoch": 0.89, "learning_rate": 1.3188855058228468e-06, "loss": 1.2068, "step": 4218 }, { "epoch": 0.89, "learning_rate": 1.3140234423076504e-06, "loss": 1.2077, "step": 4219 }, { "epoch": 0.89, "learning_rate": 1.3091700529096186e-06, "loss": 1.2678, "step": 4220 }, { "epoch": 0.89, "learning_rate": 1.304325339881709e-06, "loss": 1.2467, "step": 4221 }, { "epoch": 0.89, "learning_rate": 1.2994893054728653e-06, "loss": 1.2708, "step": 4222 }, { "epoch": 0.89, "learning_rate": 1.2946619519279979e-06, "loss": 1.2972, "step": 4223 }, { "epoch": 0.89, "learning_rate": 1.2898432814879813e-06, "loss": 1.2105, "step": 4224 }, { "epoch": 0.89, "learning_rate": 1.2850332963896706e-06, "loss": 1.2809, "step": 4225 }, { "epoch": 0.89, "learning_rate": 1.2802319988658818e-06, "loss": 1.2574, "step": 4226 }, { "epoch": 0.89, "learning_rate": 1.2754393911453944e-06, "loss": 1.2115, "step": 4227 }, { "epoch": 0.89, "learning_rate": 1.2706554754529665e-06, "loss": 1.2213, "step": 4228 }, { "epoch": 0.89, "learning_rate": 1.2658802540093084e-06, "loss": 1.2547, "step": 4229 }, { "epoch": 0.89, "learning_rate": 1.2611137290311003e-06, "loss": 1.2278, "step": 4230 }, { "epoch": 0.89, "learning_rate": 1.2563559027309925e-06, "loss": 1.1846, "step": 4231 }, { "epoch": 0.89, "learning_rate": 1.2516067773175822e-06, "loss": 1.2716, "step": 4232 }, { "epoch": 0.89, "learning_rate": 1.2468663549954397e-06, "loss": 1.2317, "step": 4233 }, { "epoch": 0.89, "learning_rate": 1.2421346379650868e-06, "loss": 1.2159, "step": 4234 }, { "epoch": 0.89, "learning_rate": 1.2374116284230153e-06, "loss": 1.2525, "step": 4235 }, { "epoch": 0.89, "learning_rate": 1.2326973285616628e-06, "loss": 1.2559, "step": 4236 }, { "epoch": 0.89, "learning_rate": 1.2279917405694298e-06, "loss": 1.239, "step": 4237 }, { "epoch": 0.89, "learning_rate": 1.2232948666306732e-06, "loss": 1.2, "step": 4238 }, { "epoch": 0.89, "learning_rate": 1.218606708925707e-06, "loss": 1.2679, "step": 4239 }, { "epoch": 0.89, "learning_rate": 1.2139272696307857e-06, "loss": 1.2491, "step": 4240 }, { "epoch": 0.89, "learning_rate": 1.2092565509181386e-06, "loss": 1.2268, "step": 4241 }, { "epoch": 0.89, "learning_rate": 1.2045945549559269e-06, "loss": 1.2602, "step": 4242 }, { "epoch": 0.89, "learning_rate": 1.1999412839082748e-06, "loss": 1.1739, "step": 4243 }, { "epoch": 0.89, "learning_rate": 1.1952967399352522e-06, "loss": 1.232, "step": 4244 }, { "epoch": 0.89, "learning_rate": 1.1906609251928746e-06, "loss": 1.2015, "step": 4245 }, { "epoch": 0.89, "learning_rate": 1.186033841833112e-06, "loss": 1.2532, "step": 4246 }, { "epoch": 0.89, "learning_rate": 1.1814154920038789e-06, "loss": 1.2721, "step": 4247 }, { "epoch": 0.89, "learning_rate": 1.17680587784903e-06, "loss": 1.1803, "step": 4248 }, { "epoch": 0.89, "learning_rate": 1.1722050015083752e-06, "loss": 1.2556, "step": 4249 }, { "epoch": 0.89, "learning_rate": 1.1676128651176578e-06, "loss": 1.1844, "step": 4250 }, { "epoch": 0.89, "learning_rate": 1.163029470808572e-06, "loss": 1.2093, "step": 4251 }, { "epoch": 0.89, "learning_rate": 1.1584548207087498e-06, "loss": 1.2231, "step": 4252 }, { "epoch": 0.89, "learning_rate": 1.1538889169417654e-06, "loss": 1.2151, "step": 4253 }, { "epoch": 0.89, "learning_rate": 1.1493317616271327e-06, "loss": 1.2287, "step": 4254 }, { "epoch": 0.89, "learning_rate": 1.1447833568803036e-06, "loss": 1.2537, "step": 4255 }, { "epoch": 0.9, "learning_rate": 1.140243704812667e-06, "loss": 1.1861, "step": 4256 }, { "epoch": 0.9, "learning_rate": 1.1357128075315572e-06, "loss": 1.221, "step": 4257 }, { "epoch": 0.9, "learning_rate": 1.1311906671402274e-06, "loss": 1.2518, "step": 4258 }, { "epoch": 0.9, "learning_rate": 1.126677285737887e-06, "loss": 1.2049, "step": 4259 }, { "epoch": 0.9, "learning_rate": 1.122172665419663e-06, "loss": 1.2403, "step": 4260 }, { "epoch": 0.9, "learning_rate": 1.1176768082766177e-06, "loss": 1.1923, "step": 4261 }, { "epoch": 0.9, "learning_rate": 1.1131897163957573e-06, "loss": 1.1651, "step": 4262 }, { "epoch": 0.9, "learning_rate": 1.1087113918600023e-06, "loss": 1.1937, "step": 4263 }, { "epoch": 0.9, "learning_rate": 1.1042418367482188e-06, "loss": 1.3132, "step": 4264 }, { "epoch": 0.9, "learning_rate": 1.0997810531351916e-06, "loss": 1.2421, "step": 4265 }, { "epoch": 0.9, "learning_rate": 1.0953290430916353e-06, "loss": 1.2078, "step": 4266 }, { "epoch": 0.9, "learning_rate": 1.0908858086841989e-06, "loss": 1.2188, "step": 4267 }, { "epoch": 0.9, "learning_rate": 1.0864513519754484e-06, "loss": 1.2179, "step": 4268 }, { "epoch": 0.9, "learning_rate": 1.082025675023879e-06, "loss": 1.2269, "step": 4269 }, { "epoch": 0.9, "learning_rate": 1.077608779883912e-06, "loss": 1.2637, "step": 4270 }, { "epoch": 0.9, "learning_rate": 1.0732006686058893e-06, "loss": 1.241, "step": 4271 }, { "epoch": 0.9, "learning_rate": 1.0688013432360811e-06, "loss": 1.2497, "step": 4272 }, { "epoch": 0.9, "learning_rate": 1.0644108058166692e-06, "loss": 1.2513, "step": 4273 }, { "epoch": 0.9, "learning_rate": 1.0600290583857631e-06, "loss": 1.2318, "step": 4274 }, { "epoch": 0.9, "learning_rate": 1.0556561029773914e-06, "loss": 1.1844, "step": 4275 }, { "epoch": 0.9, "learning_rate": 1.0512919416214995e-06, "loss": 1.2313, "step": 4276 }, { "epoch": 0.9, "learning_rate": 1.0469365763439532e-06, "loss": 1.207, "step": 4277 }, { "epoch": 0.9, "learning_rate": 1.0425900091665286e-06, "loss": 1.291, "step": 4278 }, { "epoch": 0.9, "learning_rate": 1.0382522421069274e-06, "loss": 1.2355, "step": 4279 }, { "epoch": 0.9, "learning_rate": 1.033923277178759e-06, "loss": 1.1848, "step": 4280 }, { "epoch": 0.9, "learning_rate": 1.029603116391551e-06, "loss": 1.2407, "step": 4281 }, { "epoch": 0.9, "learning_rate": 1.0252917617507374e-06, "loss": 1.2639, "step": 4282 }, { "epoch": 0.9, "learning_rate": 1.020989215257675e-06, "loss": 1.2355, "step": 4283 }, { "epoch": 0.9, "learning_rate": 1.0166954789096194e-06, "loss": 1.1912, "step": 4284 }, { "epoch": 0.9, "learning_rate": 1.0124105546997521e-06, "loss": 1.2359, "step": 4285 }, { "epoch": 0.9, "learning_rate": 1.008134444617146e-06, "loss": 1.2382, "step": 4286 }, { "epoch": 0.9, "learning_rate": 1.0038671506467934e-06, "loss": 1.2216, "step": 4287 }, { "epoch": 0.9, "learning_rate": 9.996086747695966e-07, "loss": 1.2366, "step": 4288 }, { "epoch": 0.9, "learning_rate": 9.953590189623563e-07, "loss": 1.2396, "step": 4289 }, { "epoch": 0.9, "learning_rate": 9.911181851977792e-07, "loss": 1.202, "step": 4290 }, { "epoch": 0.9, "learning_rate": 9.868861754444858e-07, "loss": 1.2017, "step": 4291 }, { "epoch": 0.9, "learning_rate": 9.826629916669917e-07, "loss": 1.2344, "step": 4292 }, { "epoch": 0.9, "learning_rate": 9.784486358257194e-07, "loss": 1.2402, "step": 4293 }, { "epoch": 0.9, "learning_rate": 9.742431098769933e-07, "loss": 1.2851, "step": 4294 }, { "epoch": 0.9, "learning_rate": 9.700464157730338e-07, "loss": 1.1857, "step": 4295 }, { "epoch": 0.9, "learning_rate": 9.658585554619737e-07, "loss": 1.2451, "step": 4296 }, { "epoch": 0.9, "learning_rate": 9.616795308878313e-07, "loss": 1.2263, "step": 4297 }, { "epoch": 0.9, "learning_rate": 9.575093439905259e-07, "loss": 1.236, "step": 4298 }, { "epoch": 0.9, "learning_rate": 9.533479967058867e-07, "loss": 1.2494, "step": 4299 }, { "epoch": 0.9, "learning_rate": 9.491954909656242e-07, "loss": 1.1863, "step": 4300 }, { "epoch": 0.9, "learning_rate": 9.450518286973542e-07, "loss": 1.2832, "step": 4301 }, { "epoch": 0.9, "learning_rate": 9.409170118245803e-07, "loss": 1.2413, "step": 4302 }, { "epoch": 0.91, "learning_rate": 9.36791042266707e-07, "loss": 1.2495, "step": 4303 }, { "epoch": 0.91, "learning_rate": 9.326739219390246e-07, "loss": 1.224, "step": 4304 }, { "epoch": 0.91, "learning_rate": 9.285656527527264e-07, "loss": 1.23, "step": 4305 }, { "epoch": 0.91, "learning_rate": 9.244662366148826e-07, "loss": 1.2561, "step": 4306 }, { "epoch": 0.91, "learning_rate": 9.203756754284665e-07, "loss": 1.2677, "step": 4307 }, { "epoch": 0.91, "learning_rate": 9.162939710923324e-07, "loss": 1.2469, "step": 4308 }, { "epoch": 0.91, "learning_rate": 9.122211255012292e-07, "loss": 1.1916, "step": 4309 }, { "epoch": 0.91, "learning_rate": 9.081571405457912e-07, "loss": 1.2088, "step": 4310 }, { "epoch": 0.91, "learning_rate": 9.041020181125315e-07, "loss": 1.2659, "step": 4311 }, { "epoch": 0.91, "learning_rate": 9.000557600838666e-07, "loss": 1.26, "step": 4312 }, { "epoch": 0.91, "learning_rate": 8.960183683380807e-07, "loss": 1.2366, "step": 4313 }, { "epoch": 0.91, "learning_rate": 8.919898447493569e-07, "loss": 1.2412, "step": 4314 }, { "epoch": 0.91, "learning_rate": 8.879701911877503e-07, "loss": 1.2653, "step": 4315 }, { "epoch": 0.91, "learning_rate": 8.839594095191995e-07, "loss": 1.1935, "step": 4316 }, { "epoch": 0.91, "learning_rate": 8.799575016055373e-07, "loss": 1.2208, "step": 4317 }, { "epoch": 0.91, "learning_rate": 8.7596446930446e-07, "loss": 1.2009, "step": 4318 }, { "epoch": 0.91, "learning_rate": 8.719803144695516e-07, "loss": 1.2297, "step": 4319 }, { "epoch": 0.91, "learning_rate": 8.680050389502814e-07, "loss": 1.258, "step": 4320 }, { "epoch": 0.91, "learning_rate": 8.640386445919847e-07, "loss": 1.2318, "step": 4321 }, { "epoch": 0.91, "learning_rate": 8.600811332358861e-07, "loss": 1.2014, "step": 4322 }, { "epoch": 0.91, "learning_rate": 8.561325067190762e-07, "loss": 1.2076, "step": 4323 }, { "epoch": 0.91, "learning_rate": 8.521927668745244e-07, "loss": 1.1566, "step": 4324 }, { "epoch": 0.91, "learning_rate": 8.482619155310812e-07, "loss": 1.2294, "step": 4325 }, { "epoch": 0.91, "learning_rate": 8.443399545134623e-07, "loss": 1.1682, "step": 4326 }, { "epoch": 0.91, "learning_rate": 8.404268856422626e-07, "loss": 1.2093, "step": 4327 }, { "epoch": 0.91, "learning_rate": 8.365227107339447e-07, "loss": 1.1701, "step": 4328 }, { "epoch": 0.91, "learning_rate": 8.326274316008475e-07, "loss": 1.2414, "step": 4329 }, { "epoch": 0.91, "learning_rate": 8.287410500511739e-07, "loss": 1.1983, "step": 4330 }, { "epoch": 0.91, "learning_rate": 8.248635678890049e-07, "loss": 1.214, "step": 4331 }, { "epoch": 0.91, "learning_rate": 8.209949869142808e-07, "loss": 1.2238, "step": 4332 }, { "epoch": 0.91, "learning_rate": 8.171353089228206e-07, "loss": 1.2027, "step": 4333 }, { "epoch": 0.91, "learning_rate": 8.132845357062979e-07, "loss": 1.2203, "step": 4334 }, { "epoch": 0.91, "learning_rate": 8.094426690522672e-07, "loss": 1.2504, "step": 4335 }, { "epoch": 0.91, "learning_rate": 8.056097107441352e-07, "loss": 1.2676, "step": 4336 }, { "epoch": 0.91, "learning_rate": 8.017856625611809e-07, "loss": 1.2207, "step": 4337 }, { "epoch": 0.91, "learning_rate": 7.979705262785442e-07, "loss": 1.2061, "step": 4338 }, { "epoch": 0.91, "learning_rate": 7.941643036672309e-07, "loss": 1.2271, "step": 4339 }, { "epoch": 0.91, "learning_rate": 7.903669964941052e-07, "loss": 1.2185, "step": 4340 }, { "epoch": 0.91, "learning_rate": 7.865786065218973e-07, "loss": 1.2584, "step": 4341 }, { "epoch": 0.91, "learning_rate": 7.827991355091891e-07, "loss": 1.2262, "step": 4342 }, { "epoch": 0.91, "learning_rate": 7.790285852104373e-07, "loss": 1.2416, "step": 4343 }, { "epoch": 0.91, "learning_rate": 7.752669573759464e-07, "loss": 1.2643, "step": 4344 }, { "epoch": 0.91, "learning_rate": 7.715142537518771e-07, "loss": 1.1716, "step": 4345 }, { "epoch": 0.91, "learning_rate": 7.677704760802562e-07, "loss": 1.2065, "step": 4346 }, { "epoch": 0.91, "learning_rate": 7.640356260989601e-07, "loss": 1.2274, "step": 4347 }, { "epoch": 0.91, "learning_rate": 7.603097055417242e-07, "loss": 1.24, "step": 4348 }, { "epoch": 0.91, "learning_rate": 7.565927161381403e-07, "loss": 1.2557, "step": 4349 }, { "epoch": 0.91, "learning_rate": 7.528846596136485e-07, "loss": 1.2253, "step": 4350 }, { "epoch": 0.92, "learning_rate": 7.491855376895519e-07, "loss": 1.2702, "step": 4351 }, { "epoch": 0.92, "learning_rate": 7.454953520829899e-07, "loss": 1.2337, "step": 4352 }, { "epoch": 0.92, "learning_rate": 7.418141045069727e-07, "loss": 1.2667, "step": 4353 }, { "epoch": 0.92, "learning_rate": 7.381417966703508e-07, "loss": 1.243, "step": 4354 }, { "epoch": 0.92, "learning_rate": 7.344784302778274e-07, "loss": 1.2792, "step": 4355 }, { "epoch": 0.92, "learning_rate": 7.308240070299489e-07, "loss": 1.206, "step": 4356 }, { "epoch": 0.92, "learning_rate": 7.271785286231204e-07, "loss": 1.2438, "step": 4357 }, { "epoch": 0.92, "learning_rate": 7.235419967495883e-07, "loss": 1.2203, "step": 4358 }, { "epoch": 0.92, "learning_rate": 7.199144130974489e-07, "loss": 1.2486, "step": 4359 }, { "epoch": 0.92, "learning_rate": 7.16295779350642e-07, "loss": 1.2434, "step": 4360 }, { "epoch": 0.92, "learning_rate": 7.126860971889527e-07, "loss": 1.2417, "step": 4361 }, { "epoch": 0.92, "learning_rate": 7.090853682880161e-07, "loss": 1.2403, "step": 4362 }, { "epoch": 0.92, "learning_rate": 7.054935943193042e-07, "loss": 1.2814, "step": 4363 }, { "epoch": 0.92, "learning_rate": 7.019107769501366e-07, "loss": 1.2591, "step": 4364 }, { "epoch": 0.92, "learning_rate": 6.983369178436739e-07, "loss": 1.3018, "step": 4365 }, { "epoch": 0.92, "learning_rate": 6.947720186589158e-07, "loss": 1.2351, "step": 4366 }, { "epoch": 0.92, "learning_rate": 6.912160810507096e-07, "loss": 1.2228, "step": 4367 }, { "epoch": 0.92, "learning_rate": 6.876691066697349e-07, "loss": 1.2691, "step": 4368 }, { "epoch": 0.92, "learning_rate": 6.841310971625103e-07, "loss": 1.2701, "step": 4369 }, { "epoch": 0.92, "learning_rate": 6.806020541714042e-07, "loss": 1.2439, "step": 4370 }, { "epoch": 0.92, "learning_rate": 6.770819793346084e-07, "loss": 1.2196, "step": 4371 }, { "epoch": 0.92, "learning_rate": 6.735708742861624e-07, "loss": 1.2263, "step": 4372 }, { "epoch": 0.92, "learning_rate": 6.700687406559359e-07, "loss": 1.2102, "step": 4373 }, { "epoch": 0.92, "learning_rate": 6.665755800696305e-07, "loss": 1.2618, "step": 4374 }, { "epoch": 0.92, "learning_rate": 6.63091394148796e-07, "loss": 1.1802, "step": 4375 }, { "epoch": 0.92, "learning_rate": 6.596161845108006e-07, "loss": 1.2205, "step": 4376 }, { "epoch": 0.92, "learning_rate": 6.561499527688586e-07, "loss": 1.1975, "step": 4377 }, { "epoch": 0.92, "learning_rate": 6.526927005320072e-07, "loss": 1.1822, "step": 4378 }, { "epoch": 0.92, "learning_rate": 6.492444294051204e-07, "loss": 1.2436, "step": 4379 }, { "epoch": 0.92, "learning_rate": 6.458051409889021e-07, "loss": 1.2051, "step": 4380 }, { "epoch": 0.92, "learning_rate": 6.423748368798843e-07, "loss": 1.2292, "step": 4381 }, { "epoch": 0.92, "learning_rate": 6.38953518670431e-07, "loss": 1.2116, "step": 4382 }, { "epoch": 0.92, "learning_rate": 6.355411879487339e-07, "loss": 1.2199, "step": 4383 }, { "epoch": 0.92, "learning_rate": 6.321378462988148e-07, "loss": 1.2384, "step": 4384 }, { "epoch": 0.92, "learning_rate": 6.287434953005145e-07, "loss": 1.2618, "step": 4385 }, { "epoch": 0.92, "learning_rate": 6.253581365295148e-07, "loss": 1.2689, "step": 4386 }, { "epoch": 0.92, "learning_rate": 6.219817715573073e-07, "loss": 1.2113, "step": 4387 }, { "epoch": 0.92, "learning_rate": 6.18614401951223e-07, "loss": 1.2616, "step": 4388 }, { "epoch": 0.92, "learning_rate": 6.152560292744091e-07, "loss": 1.2255, "step": 4389 }, { "epoch": 0.92, "learning_rate": 6.119066550858321e-07, "loss": 1.2419, "step": 4390 }, { "epoch": 0.92, "learning_rate": 6.085662809402926e-07, "loss": 1.2623, "step": 4391 }, { "epoch": 0.92, "learning_rate": 6.052349083884057e-07, "loss": 1.2152, "step": 4392 }, { "epoch": 0.92, "learning_rate": 6.019125389766123e-07, "loss": 1.2353, "step": 4393 }, { "epoch": 0.92, "learning_rate": 5.985991742471698e-07, "loss": 1.2339, "step": 4394 }, { "epoch": 0.92, "learning_rate": 5.952948157381566e-07, "loss": 1.1722, "step": 4395 }, { "epoch": 0.92, "learning_rate": 5.919994649834748e-07, "loss": 1.2445, "step": 4396 }, { "epoch": 0.92, "learning_rate": 5.887131235128385e-07, "loss": 1.2186, "step": 4397 }, { "epoch": 0.93, "learning_rate": 5.854357928517806e-07, "loss": 1.2297, "step": 4398 }, { "epoch": 0.93, "learning_rate": 5.821674745216599e-07, "loss": 1.2235, "step": 4399 }, { "epoch": 0.93, "learning_rate": 5.789081700396381e-07, "loss": 1.1534, "step": 4400 }, { "epoch": 0.93, "learning_rate": 5.756578809187008e-07, "loss": 1.2411, "step": 4401 }, { "epoch": 0.93, "learning_rate": 5.724166086676542e-07, "loss": 1.2196, "step": 4402 }, { "epoch": 0.93, "learning_rate": 5.691843547911013e-07, "loss": 1.2477, "step": 4403 }, { "epoch": 0.93, "learning_rate": 5.6596112078948e-07, "loss": 1.216, "step": 4404 }, { "epoch": 0.93, "learning_rate": 5.627469081590242e-07, "loss": 1.2527, "step": 4405 }, { "epoch": 0.93, "learning_rate": 5.595417183917851e-07, "loss": 1.2296, "step": 4406 }, { "epoch": 0.93, "learning_rate": 5.563455529756301e-07, "loss": 1.2075, "step": 4407 }, { "epoch": 0.93, "learning_rate": 5.531584133942325e-07, "loss": 1.2425, "step": 4408 }, { "epoch": 0.93, "learning_rate": 5.499803011270776e-07, "loss": 1.2671, "step": 4409 }, { "epoch": 0.93, "learning_rate": 5.468112176494633e-07, "loss": 1.2547, "step": 4410 }, { "epoch": 0.93, "learning_rate": 5.43651164432486e-07, "loss": 1.2541, "step": 4411 }, { "epoch": 0.93, "learning_rate": 5.405001429430634e-07, "loss": 1.195, "step": 4412 }, { "epoch": 0.93, "learning_rate": 5.373581546439077e-07, "loss": 1.2237, "step": 4413 }, { "epoch": 0.93, "learning_rate": 5.342252009935522e-07, "loss": 1.2363, "step": 4414 }, { "epoch": 0.93, "learning_rate": 5.311012834463247e-07, "loss": 1.2104, "step": 4415 }, { "epoch": 0.93, "learning_rate": 5.279864034523586e-07, "loss": 1.2381, "step": 4416 }, { "epoch": 0.93, "learning_rate": 5.248805624576037e-07, "loss": 1.2374, "step": 4417 }, { "epoch": 0.93, "learning_rate": 5.217837619038002e-07, "loss": 1.2506, "step": 4418 }, { "epoch": 0.93, "learning_rate": 5.186960032284983e-07, "loss": 1.2166, "step": 4419 }, { "epoch": 0.93, "learning_rate": 5.156172878650489e-07, "loss": 1.2131, "step": 4420 }, { "epoch": 0.93, "learning_rate": 5.125476172426092e-07, "loss": 1.2366, "step": 4421 }, { "epoch": 0.93, "learning_rate": 5.094869927861323e-07, "loss": 1.2112, "step": 4422 }, { "epoch": 0.93, "learning_rate": 5.064354159163754e-07, "loss": 1.2071, "step": 4423 }, { "epoch": 0.93, "learning_rate": 5.033928880498917e-07, "loss": 1.3022, "step": 4424 }, { "epoch": 0.93, "learning_rate": 5.003594105990384e-07, "loss": 1.21, "step": 4425 }, { "epoch": 0.93, "learning_rate": 4.973349849719733e-07, "loss": 1.1817, "step": 4426 }, { "epoch": 0.93, "learning_rate": 4.943196125726446e-07, "loss": 1.2437, "step": 4427 }, { "epoch": 0.93, "learning_rate": 4.913132948008037e-07, "loss": 1.1759, "step": 4428 }, { "epoch": 0.93, "learning_rate": 4.883160330519965e-07, "loss": 1.2422, "step": 4429 }, { "epoch": 0.93, "learning_rate": 4.853278287175677e-07, "loss": 1.2204, "step": 4430 }, { "epoch": 0.93, "learning_rate": 4.823486831846547e-07, "loss": 1.2376, "step": 4431 }, { "epoch": 0.93, "learning_rate": 4.793785978361887e-07, "loss": 1.2706, "step": 4432 }, { "epoch": 0.93, "learning_rate": 4.764175740509025e-07, "loss": 1.2328, "step": 4433 }, { "epoch": 0.93, "learning_rate": 4.7346561320330997e-07, "loss": 1.2333, "step": 4434 }, { "epoch": 0.93, "learning_rate": 4.7052271666373053e-07, "loss": 1.1884, "step": 4435 }, { "epoch": 0.93, "learning_rate": 4.675888857982669e-07, "loss": 1.2321, "step": 4436 }, { "epoch": 0.93, "learning_rate": 4.646641219688186e-07, "loss": 1.2524, "step": 4437 }, { "epoch": 0.93, "learning_rate": 4.617484265330752e-07, "loss": 1.2138, "step": 4438 }, { "epoch": 0.93, "learning_rate": 4.588418008445161e-07, "loss": 1.2546, "step": 4439 }, { "epoch": 0.93, "learning_rate": 4.5594424625240887e-07, "loss": 1.2357, "step": 4440 }, { "epoch": 0.93, "learning_rate": 4.5305576410181293e-07, "loss": 1.2278, "step": 4441 }, { "epoch": 0.93, "learning_rate": 4.5017635573357366e-07, "loss": 1.2714, "step": 4442 }, { "epoch": 0.93, "learning_rate": 4.4730602248432843e-07, "loss": 1.2129, "step": 4443 }, { "epoch": 0.93, "learning_rate": 4.444447656864981e-07, "loss": 1.2472, "step": 4444 }, { "epoch": 0.93, "learning_rate": 4.4159258666828907e-07, "loss": 1.2369, "step": 4445 }, { "epoch": 0.94, "learning_rate": 4.3874948675370233e-07, "loss": 1.2305, "step": 4446 }, { "epoch": 0.94, "learning_rate": 4.3591546726250877e-07, "loss": 1.234, "step": 4447 }, { "epoch": 0.94, "learning_rate": 4.3309052951028275e-07, "loss": 1.2171, "step": 4448 }, { "epoch": 0.94, "learning_rate": 4.302746748083664e-07, "loss": 1.1726, "step": 4449 }, { "epoch": 0.94, "learning_rate": 4.2746790446389853e-07, "loss": 1.2404, "step": 4450 }, { "epoch": 0.94, "learning_rate": 4.2467021977978806e-07, "loss": 1.1928, "step": 4451 }, { "epoch": 0.94, "learning_rate": 4.218816220547406e-07, "loss": 1.2398, "step": 4452 }, { "epoch": 0.94, "learning_rate": 4.1910211258322954e-07, "loss": 1.2147, "step": 4453 }, { "epoch": 0.94, "learning_rate": 4.1633169265552274e-07, "loss": 1.2187, "step": 4454 }, { "epoch": 0.94, "learning_rate": 4.13570363557656e-07, "loss": 1.2409, "step": 4455 }, { "epoch": 0.94, "learning_rate": 4.108181265714528e-07, "loss": 1.2315, "step": 4456 }, { "epoch": 0.94, "learning_rate": 4.0807498297451786e-07, "loss": 1.2053, "step": 4457 }, { "epoch": 0.94, "learning_rate": 4.053409340402259e-07, "loss": 1.1859, "step": 4458 }, { "epoch": 0.94, "learning_rate": 4.026159810377417e-07, "loss": 1.1849, "step": 4459 }, { "epoch": 0.94, "learning_rate": 3.999001252319934e-07, "loss": 1.2131, "step": 4460 }, { "epoch": 0.94, "learning_rate": 3.971933678836992e-07, "loss": 1.283, "step": 4461 }, { "epoch": 0.94, "learning_rate": 3.944957102493474e-07, "loss": 1.2054, "step": 4462 }, { "epoch": 0.94, "learning_rate": 3.918071535812007e-07, "loss": 1.2272, "step": 4463 }, { "epoch": 0.94, "learning_rate": 3.8912769912730297e-07, "loss": 1.2537, "step": 4464 }, { "epoch": 0.94, "learning_rate": 3.864573481314682e-07, "loss": 1.2086, "step": 4465 }, { "epoch": 0.94, "learning_rate": 3.837961018332825e-07, "loss": 1.2417, "step": 4466 }, { "epoch": 0.94, "learning_rate": 3.811439614681156e-07, "loss": 1.2355, "step": 4467 }, { "epoch": 0.94, "learning_rate": 3.7850092826709817e-07, "loss": 1.2388, "step": 4468 }, { "epoch": 0.94, "learning_rate": 3.758670034571399e-07, "loss": 1.222, "step": 4469 }, { "epoch": 0.94, "learning_rate": 3.7324218826092053e-07, "loss": 1.1949, "step": 4470 }, { "epoch": 0.94, "learning_rate": 3.7062648389689204e-07, "loss": 1.2616, "step": 4471 }, { "epoch": 0.94, "learning_rate": 3.680198915792765e-07, "loss": 1.2673, "step": 4472 }, { "epoch": 0.94, "learning_rate": 3.654224125180661e-07, "loss": 1.1847, "step": 4473 }, { "epoch": 0.94, "learning_rate": 3.628340479190229e-07, "loss": 1.253, "step": 4474 }, { "epoch": 0.94, "learning_rate": 3.602547989836769e-07, "loss": 1.2759, "step": 4475 }, { "epoch": 0.94, "learning_rate": 3.5768466690933036e-07, "loss": 1.2087, "step": 4476 }, { "epoch": 0.94, "learning_rate": 3.551236528890445e-07, "loss": 1.2671, "step": 4477 }, { "epoch": 0.94, "learning_rate": 3.5257175811166166e-07, "loss": 1.2362, "step": 4478 }, { "epoch": 0.94, "learning_rate": 3.500289837617765e-07, "loss": 1.2305, "step": 4479 }, { "epoch": 0.94, "learning_rate": 3.474953310197604e-07, "loss": 1.2287, "step": 4480 }, { "epoch": 0.94, "learning_rate": 3.4497080106174806e-07, "loss": 1.2224, "step": 4481 }, { "epoch": 0.94, "learning_rate": 3.424553950596332e-07, "loss": 1.2744, "step": 4482 }, { "epoch": 0.94, "learning_rate": 3.3994911418108176e-07, "loss": 1.2061, "step": 4483 }, { "epoch": 0.94, "learning_rate": 3.374519595895209e-07, "loss": 1.2135, "step": 4484 }, { "epoch": 0.94, "learning_rate": 3.3496393244414114e-07, "loss": 1.2378, "step": 4485 }, { "epoch": 0.94, "learning_rate": 3.324850338998964e-07, "loss": 1.244, "step": 4486 }, { "epoch": 0.94, "learning_rate": 3.300152651075039e-07, "loss": 1.2142, "step": 4487 }, { "epoch": 0.94, "learning_rate": 3.2755462721344e-07, "loss": 1.2057, "step": 4488 }, { "epoch": 0.94, "learning_rate": 3.251031213599465e-07, "loss": 1.2561, "step": 4489 }, { "epoch": 0.94, "learning_rate": 3.2266074868501976e-07, "loss": 1.2426, "step": 4490 }, { "epoch": 0.94, "learning_rate": 3.2022751032242396e-07, "loss": 1.2202, "step": 4491 }, { "epoch": 0.94, "learning_rate": 3.178034074016778e-07, "loss": 1.2431, "step": 4492 }, { "epoch": 0.95, "learning_rate": 3.1538844104806343e-07, "loss": 1.2423, "step": 4493 }, { "epoch": 0.95, "learning_rate": 3.1298261238261964e-07, "loss": 1.2351, "step": 4494 }, { "epoch": 0.95, "learning_rate": 3.105859225221397e-07, "loss": 1.2049, "step": 4495 }, { "epoch": 0.95, "learning_rate": 3.0819837257918037e-07, "loss": 1.2546, "step": 4496 }, { "epoch": 0.95, "learning_rate": 3.058199636620529e-07, "loss": 1.1829, "step": 4497 }, { "epoch": 0.95, "learning_rate": 3.034506968748274e-07, "loss": 1.2594, "step": 4498 }, { "epoch": 0.95, "learning_rate": 3.010905733173264e-07, "loss": 1.2462, "step": 4499 }, { "epoch": 0.95, "learning_rate": 2.987395940851312e-07, "loss": 1.239, "step": 4500 }, { "epoch": 0.95, "learning_rate": 2.9639776026957777e-07, "loss": 1.2714, "step": 4501 }, { "epoch": 0.95, "learning_rate": 2.9406507295775657e-07, "loss": 1.2536, "step": 4502 }, { "epoch": 0.95, "learning_rate": 2.91741533232508e-07, "loss": 1.2272, "step": 4503 }, { "epoch": 0.95, "learning_rate": 2.894271421724359e-07, "loss": 1.2041, "step": 4504 }, { "epoch": 0.95, "learning_rate": 2.871219008518877e-07, "loss": 1.2234, "step": 4505 }, { "epoch": 0.95, "learning_rate": 2.8482581034096733e-07, "loss": 1.2461, "step": 4506 }, { "epoch": 0.95, "learning_rate": 2.825388717055311e-07, "loss": 1.2007, "step": 4507 }, { "epoch": 0.95, "learning_rate": 2.8026108600718746e-07, "loss": 1.2223, "step": 4508 }, { "epoch": 0.95, "learning_rate": 2.7799245430329526e-07, "loss": 1.2345, "step": 4509 }, { "epoch": 0.95, "learning_rate": 2.7573297764696085e-07, "loss": 1.2377, "step": 4510 }, { "epoch": 0.95, "learning_rate": 2.7348265708704745e-07, "loss": 1.1932, "step": 4511 }, { "epoch": 0.95, "learning_rate": 2.7124149366816177e-07, "loss": 1.2411, "step": 4512 }, { "epoch": 0.95, "learning_rate": 2.690094884306649e-07, "loss": 1.2273, "step": 4513 }, { "epoch": 0.95, "learning_rate": 2.667866424106591e-07, "loss": 1.2573, "step": 4514 }, { "epoch": 0.95, "learning_rate": 2.6457295664000573e-07, "loss": 1.2285, "step": 4515 }, { "epoch": 0.95, "learning_rate": 2.623684321463049e-07, "loss": 1.2345, "step": 4516 }, { "epoch": 0.95, "learning_rate": 2.6017306995290926e-07, "loss": 1.2686, "step": 4517 }, { "epoch": 0.95, "learning_rate": 2.579868710789124e-07, "loss": 1.2229, "step": 4518 }, { "epoch": 0.95, "learning_rate": 2.5580983653916035e-07, "loss": 1.2112, "step": 4519 }, { "epoch": 0.95, "learning_rate": 2.5364196734424475e-07, "loss": 1.2086, "step": 4520 }, { "epoch": 0.95, "learning_rate": 2.514832645004939e-07, "loss": 1.2545, "step": 4521 }, { "epoch": 0.95, "learning_rate": 2.493337290099973e-07, "loss": 1.2147, "step": 4522 }, { "epoch": 0.95, "learning_rate": 2.471933618705702e-07, "loss": 1.2331, "step": 4523 }, { "epoch": 0.95, "learning_rate": 2.4506216407578665e-07, "loss": 1.2037, "step": 4524 }, { "epoch": 0.95, "learning_rate": 2.429401366149553e-07, "loss": 1.2079, "step": 4525 }, { "epoch": 0.95, "learning_rate": 2.4082728047313487e-07, "loss": 1.2442, "step": 4526 }, { "epoch": 0.95, "learning_rate": 2.3872359663111856e-07, "loss": 1.2323, "step": 4527 }, { "epoch": 0.95, "learning_rate": 2.3662908606544964e-07, "loss": 1.2278, "step": 4528 }, { "epoch": 0.95, "learning_rate": 2.34543749748406e-07, "loss": 1.279, "step": 4529 }, { "epoch": 0.95, "learning_rate": 2.3246758864801544e-07, "loss": 1.2132, "step": 4530 }, { "epoch": 0.95, "learning_rate": 2.304006037280404e-07, "loss": 1.2389, "step": 4531 }, { "epoch": 0.95, "learning_rate": 2.2834279594798002e-07, "loss": 1.2062, "step": 4532 }, { "epoch": 0.95, "learning_rate": 2.2629416626308353e-07, "loss": 1.1694, "step": 4533 }, { "epoch": 0.95, "learning_rate": 2.2425471562433466e-07, "loss": 1.2011, "step": 4534 }, { "epoch": 0.95, "learning_rate": 2.222244449784494e-07, "loss": 1.1841, "step": 4535 }, { "epoch": 0.95, "learning_rate": 2.2020335526789616e-07, "loss": 1.2362, "step": 4536 }, { "epoch": 0.95, "learning_rate": 2.1819144743086883e-07, "loss": 1.2204, "step": 4537 }, { "epoch": 0.95, "learning_rate": 2.1618872240130928e-07, "loss": 1.2544, "step": 4538 }, { "epoch": 0.95, "learning_rate": 2.1419518110888938e-07, "loss": 1.2338, "step": 4539 }, { "epoch": 0.95, "learning_rate": 2.1221082447901774e-07, "loss": 1.2449, "step": 4540 }, { "epoch": 0.96, "learning_rate": 2.1023565343284425e-07, "loss": 1.2354, "step": 4541 }, { "epoch": 0.96, "learning_rate": 2.082696688872554e-07, "loss": 1.2078, "step": 4542 }, { "epoch": 0.96, "learning_rate": 2.063128717548657e-07, "loss": 1.2149, "step": 4543 }, { "epoch": 0.96, "learning_rate": 2.043652629440307e-07, "loss": 1.2556, "step": 4544 }, { "epoch": 0.96, "learning_rate": 2.0242684335884056e-07, "loss": 1.2607, "step": 4545 }, { "epoch": 0.96, "learning_rate": 2.0049761389911772e-07, "loss": 1.2898, "step": 4546 }, { "epoch": 0.96, "learning_rate": 1.9857757546041912e-07, "loss": 1.2345, "step": 4547 }, { "epoch": 0.96, "learning_rate": 1.9666672893403627e-07, "loss": 1.2257, "step": 4548 }, { "epoch": 0.96, "learning_rate": 1.9476507520699518e-07, "loss": 1.2238, "step": 4549 }, { "epoch": 0.96, "learning_rate": 1.928726151620497e-07, "loss": 1.2325, "step": 4550 }, { "epoch": 0.96, "learning_rate": 1.9098934967768823e-07, "loss": 1.2727, "step": 4551 }, { "epoch": 0.96, "learning_rate": 1.891152796281337e-07, "loss": 1.2301, "step": 4552 }, { "epoch": 0.96, "learning_rate": 1.8725040588333466e-07, "loss": 1.1936, "step": 4553 }, { "epoch": 0.96, "learning_rate": 1.853947293089764e-07, "loss": 1.2217, "step": 4554 }, { "epoch": 0.96, "learning_rate": 1.8354825076647432e-07, "loss": 1.2005, "step": 4555 }, { "epoch": 0.96, "learning_rate": 1.817109711129672e-07, "loss": 1.1929, "step": 4556 }, { "epoch": 0.96, "learning_rate": 1.7988289120133507e-07, "loss": 1.2421, "step": 4557 }, { "epoch": 0.96, "learning_rate": 1.7806401188017463e-07, "loss": 1.2203, "step": 4558 }, { "epoch": 0.96, "learning_rate": 1.7625433399382386e-07, "loss": 1.2239, "step": 4559 }, { "epoch": 0.96, "learning_rate": 1.7445385838234185e-07, "loss": 1.223, "step": 4560 }, { "epoch": 0.96, "learning_rate": 1.7266258588151562e-07, "loss": 1.1813, "step": 4561 }, { "epoch": 0.96, "learning_rate": 1.7088051732286448e-07, "loss": 1.2105, "step": 4562 }, { "epoch": 0.96, "learning_rate": 1.6910765353363334e-07, "loss": 1.2597, "step": 4563 }, { "epoch": 0.96, "learning_rate": 1.6734399533679057e-07, "loss": 1.237, "step": 4564 }, { "epoch": 0.96, "learning_rate": 1.6558954355103686e-07, "loss": 1.2297, "step": 4565 }, { "epoch": 0.96, "learning_rate": 1.6384429899079624e-07, "loss": 1.2204, "step": 4566 }, { "epoch": 0.96, "learning_rate": 1.6210826246622068e-07, "loss": 1.1876, "step": 4567 }, { "epoch": 0.96, "learning_rate": 1.603814347831856e-07, "loss": 1.2152, "step": 4568 }, { "epoch": 0.96, "learning_rate": 1.586638167432919e-07, "loss": 1.2398, "step": 4569 }, { "epoch": 0.96, "learning_rate": 1.5695540914386632e-07, "loss": 1.2265, "step": 4570 }, { "epoch": 0.96, "learning_rate": 1.552562127779611e-07, "loss": 1.1975, "step": 4571 }, { "epoch": 0.96, "learning_rate": 1.5356622843434533e-07, "loss": 1.1959, "step": 4572 }, { "epoch": 0.96, "learning_rate": 1.51885456897527e-07, "loss": 1.2192, "step": 4573 }, { "epoch": 0.96, "learning_rate": 1.5021389894771753e-07, "loss": 1.2153, "step": 4574 }, { "epoch": 0.96, "learning_rate": 1.4855155536087184e-07, "loss": 1.2368, "step": 4575 }, { "epoch": 0.96, "learning_rate": 1.4689842690865042e-07, "loss": 1.2154, "step": 4576 }, { "epoch": 0.96, "learning_rate": 1.4525451435844608e-07, "loss": 1.2416, "step": 4577 }, { "epoch": 0.96, "learning_rate": 1.436198184733706e-07, "loss": 1.1973, "step": 4578 }, { "epoch": 0.96, "learning_rate": 1.4199434001225697e-07, "loss": 1.2245, "step": 4579 }, { "epoch": 0.96, "learning_rate": 1.4037807972966167e-07, "loss": 1.2799, "step": 4580 }, { "epoch": 0.96, "learning_rate": 1.387710383758556e-07, "loss": 1.2327, "step": 4581 }, { "epoch": 0.96, "learning_rate": 1.3717321669683981e-07, "loss": 1.2359, "step": 4582 }, { "epoch": 0.96, "learning_rate": 1.3558461543432767e-07, "loss": 1.2137, "step": 4583 }, { "epoch": 0.96, "learning_rate": 1.3400523532575592e-07, "loss": 1.2329, "step": 4584 }, { "epoch": 0.96, "learning_rate": 1.324350771042804e-07, "loss": 1.2091, "step": 4585 }, { "epoch": 0.96, "learning_rate": 1.3087414149877574e-07, "loss": 1.2304, "step": 4586 }, { "epoch": 0.96, "learning_rate": 1.2932242923383575e-07, "loss": 1.2547, "step": 4587 }, { "epoch": 0.97, "learning_rate": 1.277799410297731e-07, "loss": 1.2672, "step": 4588 }, { "epoch": 0.97, "learning_rate": 1.26246677602615e-07, "loss": 1.2167, "step": 4589 }, { "epoch": 0.97, "learning_rate": 1.2472263966411214e-07, "loss": 1.2341, "step": 4590 }, { "epoch": 0.97, "learning_rate": 1.2320782792173192e-07, "loss": 1.2091, "step": 4591 }, { "epoch": 0.97, "learning_rate": 1.2170224307865185e-07, "loss": 1.2378, "step": 4592 }, { "epoch": 0.97, "learning_rate": 1.2020588583377513e-07, "loss": 1.2021, "step": 4593 }, { "epoch": 0.97, "learning_rate": 1.187187568817172e-07, "loss": 1.2016, "step": 4594 }, { "epoch": 0.97, "learning_rate": 1.1724085691280806e-07, "loss": 1.2125, "step": 4595 }, { "epoch": 0.97, "learning_rate": 1.1577218661309896e-07, "loss": 1.2236, "step": 4596 }, { "epoch": 0.97, "learning_rate": 1.1431274666435121e-07, "loss": 1.161, "step": 4597 }, { "epoch": 0.97, "learning_rate": 1.1286253774404288e-07, "loss": 1.2346, "step": 4598 }, { "epoch": 0.97, "learning_rate": 1.11421560525371e-07, "loss": 1.2394, "step": 4599 }, { "epoch": 0.97, "learning_rate": 1.0998981567724276e-07, "loss": 1.2812, "step": 4600 }, { "epoch": 0.97, "learning_rate": 1.0856730386427983e-07, "loss": 1.2264, "step": 4601 }, { "epoch": 0.97, "learning_rate": 1.0715402574681843e-07, "loss": 1.2406, "step": 4602 }, { "epoch": 0.97, "learning_rate": 1.0574998198090935e-07, "loss": 1.2552, "step": 4603 }, { "epoch": 0.97, "learning_rate": 1.0435517321831568e-07, "loss": 1.1945, "step": 4604 }, { "epoch": 0.97, "learning_rate": 1.0296960010651725e-07, "loss": 1.1916, "step": 4605 }, { "epoch": 0.97, "learning_rate": 1.0159326328869734e-07, "loss": 1.1895, "step": 4606 }, { "epoch": 0.97, "learning_rate": 1.0022616340376489e-07, "loss": 1.2183, "step": 4607 }, { "epoch": 0.97, "learning_rate": 9.886830108632784e-08, "loss": 1.209, "step": 4608 }, { "epoch": 0.97, "learning_rate": 9.751967696671749e-08, "loss": 1.2673, "step": 4609 }, { "epoch": 0.97, "learning_rate": 9.618029167096865e-08, "loss": 1.271, "step": 4610 }, { "epoch": 0.97, "learning_rate": 9.485014582083063e-08, "loss": 1.1878, "step": 4611 }, { "epoch": 0.97, "learning_rate": 9.352924003376285e-08, "loss": 1.2163, "step": 4612 }, { "epoch": 0.97, "learning_rate": 9.221757492293704e-08, "loss": 1.2079, "step": 4613 }, { "epoch": 0.97, "learning_rate": 9.091515109723281e-08, "loss": 1.2322, "step": 4614 }, { "epoch": 0.97, "learning_rate": 8.96219691612421e-08, "loss": 1.213, "step": 4615 }, { "epoch": 0.97, "learning_rate": 8.833802971526472e-08, "loss": 1.2398, "step": 4616 }, { "epoch": 0.97, "learning_rate": 8.706333335531503e-08, "loss": 1.2366, "step": 4617 }, { "epoch": 0.97, "learning_rate": 8.579788067310858e-08, "loss": 1.2867, "step": 4618 }, { "epoch": 0.97, "learning_rate": 8.454167225607768e-08, "loss": 1.2004, "step": 4619 }, { "epoch": 0.97, "learning_rate": 8.32947086873559e-08, "loss": 1.235, "step": 4620 }, { "epoch": 0.97, "learning_rate": 8.205699054579575e-08, "loss": 1.2393, "step": 4621 }, { "epoch": 0.97, "learning_rate": 8.082851840594652e-08, "loss": 1.2185, "step": 4622 }, { "epoch": 0.97, "learning_rate": 7.960929283807429e-08, "loss": 1.2763, "step": 4623 }, { "epoch": 0.97, "learning_rate": 7.83993144081463e-08, "loss": 1.1988, "step": 4624 }, { "epoch": 0.97, "learning_rate": 7.719858367784216e-08, "loss": 1.2469, "step": 4625 }, { "epoch": 0.97, "learning_rate": 7.600710120454491e-08, "loss": 1.2057, "step": 4626 }, { "epoch": 0.97, "learning_rate": 7.482486754134765e-08, "loss": 1.2969, "step": 4627 }, { "epoch": 0.97, "learning_rate": 7.365188323704919e-08, "loss": 1.2414, "step": 4628 }, { "epoch": 0.97, "learning_rate": 7.248814883615174e-08, "loss": 1.2522, "step": 4629 }, { "epoch": 0.97, "learning_rate": 7.133366487886762e-08, "loss": 1.2131, "step": 4630 }, { "epoch": 0.97, "learning_rate": 7.018843190111479e-08, "loss": 1.2519, "step": 4631 }, { "epoch": 0.97, "learning_rate": 6.90524504345147e-08, "loss": 1.2755, "step": 4632 }, { "epoch": 0.97, "learning_rate": 6.792572100639661e-08, "loss": 1.2914, "step": 4633 }, { "epoch": 0.97, "learning_rate": 6.680824413979103e-08, "loss": 1.2579, "step": 4634 }, { "epoch": 0.97, "learning_rate": 6.570002035343636e-08, "loss": 1.2256, "step": 4635 }, { "epoch": 0.98, "learning_rate": 6.460105016177887e-08, "loss": 1.2272, "step": 4636 }, { "epoch": 0.98, "learning_rate": 6.351133407495936e-08, "loss": 1.2768, "step": 4637 }, { "epoch": 0.98, "learning_rate": 6.24308725988354e-08, "loss": 1.1901, "step": 4638 }, { "epoch": 0.98, "learning_rate": 6.135966623495915e-08, "loss": 1.2421, "step": 4639 }, { "epoch": 0.98, "learning_rate": 6.029771548058838e-08, "loss": 1.2004, "step": 4640 }, { "epoch": 0.98, "learning_rate": 5.924502082868655e-08, "loss": 1.2288, "step": 4641 }, { "epoch": 0.98, "learning_rate": 5.820158276792054e-08, "loss": 1.2435, "step": 4642 }, { "epoch": 0.98, "learning_rate": 5.716740178266067e-08, "loss": 1.2091, "step": 4643 }, { "epoch": 0.98, "learning_rate": 5.614247835297404e-08, "loss": 1.1938, "step": 4644 }, { "epoch": 0.98, "learning_rate": 5.512681295463784e-08, "loss": 1.2725, "step": 4645 }, { "epoch": 0.98, "learning_rate": 5.4120406059128274e-08, "loss": 1.2059, "step": 4646 }, { "epoch": 0.98, "learning_rate": 5.312325813362274e-08, "loss": 1.2319, "step": 4647 }, { "epoch": 0.98, "learning_rate": 5.21353696410043e-08, "loss": 1.1882, "step": 4648 }, { "epoch": 0.98, "learning_rate": 5.11567410398528e-08, "loss": 1.221, "step": 4649 }, { "epoch": 0.98, "learning_rate": 5.0187372784453734e-08, "loss": 1.2138, "step": 4650 }, { "epoch": 0.98, "learning_rate": 4.922726532479383e-08, "loss": 1.2108, "step": 4651 }, { "epoch": 0.98, "learning_rate": 4.827641910655656e-08, "loss": 1.2147, "step": 4652 }, { "epoch": 0.98, "learning_rate": 4.7334834571128866e-08, "loss": 1.2249, "step": 4653 }, { "epoch": 0.98, "learning_rate": 4.640251215560332e-08, "loss": 1.1814, "step": 4654 }, { "epoch": 0.98, "learning_rate": 4.547945229276263e-08, "loss": 1.2164, "step": 4655 }, { "epoch": 0.98, "learning_rate": 4.456565541109958e-08, "loss": 1.187, "step": 4656 }, { "epoch": 0.98, "learning_rate": 4.366112193480154e-08, "loss": 1.1888, "step": 4657 }, { "epoch": 0.98, "learning_rate": 4.276585228375485e-08, "loss": 1.2154, "step": 4658 }, { "epoch": 0.98, "learning_rate": 4.187984687355151e-08, "loss": 1.1806, "step": 4659 }, { "epoch": 0.98, "learning_rate": 4.100310611547809e-08, "loss": 1.2458, "step": 4660 }, { "epoch": 0.98, "learning_rate": 4.013563041652013e-08, "loss": 1.2249, "step": 4661 }, { "epoch": 0.98, "learning_rate": 3.927742017936664e-08, "loss": 1.2553, "step": 4662 }, { "epoch": 0.98, "learning_rate": 3.8428475802398944e-08, "loss": 1.2188, "step": 4663 }, { "epoch": 0.98, "learning_rate": 3.7588797679706245e-08, "loss": 1.2438, "step": 4664 }, { "epoch": 0.98, "learning_rate": 3.6758386201065645e-08, "loss": 1.2461, "step": 4665 }, { "epoch": 0.98, "learning_rate": 3.5937241751962115e-08, "loss": 1.2565, "step": 4666 }, { "epoch": 0.98, "learning_rate": 3.5125364713572976e-08, "loss": 1.2366, "step": 4667 }, { "epoch": 0.98, "learning_rate": 3.4322755462774525e-08, "loss": 1.1382, "step": 4668 }, { "epoch": 0.98, "learning_rate": 3.3529414372142074e-08, "loss": 1.2868, "step": 4669 }, { "epoch": 0.98, "learning_rate": 3.2745341809949923e-08, "loss": 1.233, "step": 4670 }, { "epoch": 0.98, "learning_rate": 3.1970538140166927e-08, "loss": 1.2067, "step": 4671 }, { "epoch": 0.98, "learning_rate": 3.1205003722460935e-08, "loss": 1.2252, "step": 4672 }, { "epoch": 0.98, "learning_rate": 3.0448738912196574e-08, "loss": 1.2462, "step": 4673 }, { "epoch": 0.98, "learning_rate": 2.9701744060435246e-08, "loss": 1.2097, "step": 4674 }, { "epoch": 0.98, "learning_rate": 2.8964019513935126e-08, "loss": 1.2165, "step": 4675 }, { "epoch": 0.98, "learning_rate": 2.8235565615151172e-08, "loss": 1.1683, "step": 4676 }, { "epoch": 0.98, "learning_rate": 2.7516382702235112e-08, "loss": 1.1979, "step": 4677 }, { "epoch": 0.98, "learning_rate": 2.6806471109037668e-08, "loss": 1.2402, "step": 4678 }, { "epoch": 0.98, "learning_rate": 2.6105831165099683e-08, "loss": 1.2255, "step": 4679 }, { "epoch": 0.98, "learning_rate": 2.541446319566321e-08, "loss": 1.2091, "step": 4680 }, { "epoch": 0.98, "learning_rate": 2.473236752166264e-08, "loss": 1.209, "step": 4681 }, { "epoch": 0.98, "learning_rate": 2.4059544459731356e-08, "loss": 1.1749, "step": 4682 }, { "epoch": 0.99, "learning_rate": 2.3395994322199522e-08, "loss": 1.18, "step": 4683 }, { "epoch": 0.99, "learning_rate": 2.2741717417085196e-08, "loss": 1.2378, "step": 4684 }, { "epoch": 0.99, "learning_rate": 2.2096714048109867e-08, "loss": 1.2202, "step": 4685 }, { "epoch": 0.99, "learning_rate": 2.1460984514685145e-08, "loss": 1.2915, "step": 4686 }, { "epoch": 0.99, "learning_rate": 2.083452911192163e-08, "loss": 1.2164, "step": 4687 }, { "epoch": 0.99, "learning_rate": 2.021734813062226e-08, "loss": 1.2087, "step": 4688 }, { "epoch": 0.99, "learning_rate": 1.960944185728675e-08, "loss": 1.1995, "step": 4689 }, { "epoch": 0.99, "learning_rate": 1.9010810574102702e-08, "loss": 1.1979, "step": 4690 }, { "epoch": 0.99, "learning_rate": 1.842145455896338e-08, "loss": 1.2474, "step": 4691 }, { "epoch": 0.99, "learning_rate": 1.7841374085447728e-08, "loss": 1.2469, "step": 4692 }, { "epoch": 0.99, "learning_rate": 1.727056942283367e-08, "loss": 1.2645, "step": 4693 }, { "epoch": 0.99, "learning_rate": 1.6709040836089262e-08, "loss": 1.2161, "step": 4694 }, { "epoch": 0.99, "learning_rate": 1.6156788585879325e-08, "loss": 1.2441, "step": 4695 }, { "epoch": 0.99, "learning_rate": 1.5613812928563233e-08, "loss": 1.2384, "step": 4696 }, { "epoch": 0.99, "learning_rate": 1.508011411619048e-08, "loss": 1.2884, "step": 4697 }, { "epoch": 0.99, "learning_rate": 1.4555692396509557e-08, "loss": 1.2231, "step": 4698 }, { "epoch": 0.99, "learning_rate": 1.4040548012956844e-08, "loss": 1.1984, "step": 4699 }, { "epoch": 0.99, "learning_rate": 1.3534681204665502e-08, "loss": 1.2186, "step": 4700 }, { "epoch": 0.99, "learning_rate": 1.3038092206461017e-08, "loss": 1.2536, "step": 4701 }, { "epoch": 0.99, "learning_rate": 1.2550781248863442e-08, "loss": 1.2366, "step": 4702 }, { "epoch": 0.99, "learning_rate": 1.2072748558082936e-08, "loss": 1.2491, "step": 4703 }, { "epoch": 0.99, "learning_rate": 1.1603994356026437e-08, "loss": 1.2115, "step": 4704 }, { "epoch": 0.99, "learning_rate": 1.1144518860290998e-08, "loss": 1.2219, "step": 4705 }, { "epoch": 0.99, "learning_rate": 1.0694322284166003e-08, "loss": 1.2137, "step": 4706 }, { "epoch": 0.99, "learning_rate": 1.0253404836637615e-08, "loss": 1.1968, "step": 4707 }, { "epoch": 0.99, "learning_rate": 9.821766722379888e-09, "loss": 1.2349, "step": 4708 }, { "epoch": 0.99, "learning_rate": 9.399408141761434e-09, "loss": 1.185, "step": 4709 }, { "epoch": 0.99, "learning_rate": 8.9863292908432e-09, "loss": 1.258, "step": 4710 }, { "epoch": 0.99, "learning_rate": 8.58253036137846e-09, "loss": 1.2634, "step": 4711 }, { "epoch": 0.99, "learning_rate": 8.188011540812834e-09, "loss": 1.2651, "step": 4712 }, { "epoch": 0.99, "learning_rate": 7.80277301228205e-09, "loss": 1.2436, "step": 4713 }, { "epoch": 0.99, "learning_rate": 7.426814954618611e-09, "loss": 1.2377, "step": 4714 }, { "epoch": 0.99, "learning_rate": 7.060137542340695e-09, "loss": 1.2328, "step": 4715 }, { "epoch": 0.99, "learning_rate": 6.702740945663256e-09, "loss": 1.1509, "step": 4716 }, { "epoch": 0.99, "learning_rate": 6.35462533049358e-09, "loss": 1.2448, "step": 4717 }, { "epoch": 0.99, "learning_rate": 6.0157908584246305e-09, "loss": 1.2704, "step": 4718 }, { "epoch": 0.99, "learning_rate": 5.686237686746143e-09, "loss": 1.2092, "step": 4719 }, { "epoch": 0.99, "learning_rate": 5.36596596844019e-09, "loss": 1.2177, "step": 4720 }, { "epoch": 0.99, "learning_rate": 5.054975852176736e-09, "loss": 1.2601, "step": 4721 }, { "epoch": 0.99, "learning_rate": 4.7532674823203e-09, "loss": 1.2601, "step": 4722 }, { "epoch": 0.99, "learning_rate": 4.4608409989232995e-09, "loss": 1.2555, "step": 4723 }, { "epoch": 0.99, "learning_rate": 4.17769653773048e-09, "loss": 1.2481, "step": 4724 }, { "epoch": 0.99, "learning_rate": 3.903834230183368e-09, "loss": 1.2643, "step": 4725 }, { "epoch": 0.99, "learning_rate": 3.639254203406939e-09, "loss": 1.2755, "step": 4726 }, { "epoch": 0.99, "learning_rate": 3.383956580218506e-09, "loss": 1.1891, "step": 4727 }, { "epoch": 0.99, "learning_rate": 3.1379414791343766e-09, "loss": 1.2494, "step": 4728 }, { "epoch": 0.99, "learning_rate": 2.9012090143498704e-09, "loss": 1.2551, "step": 4729 }, { "epoch": 0.99, "learning_rate": 2.6737592957615243e-09, "loss": 1.1741, "step": 4730 }, { "epoch": 1.0, "learning_rate": 2.4555924289493272e-09, "loss": 1.2429, "step": 4731 }, { "epoch": 1.0, "learning_rate": 2.2467085151900436e-09, "loss": 1.2265, "step": 4732 }, { "epoch": 1.0, "learning_rate": 2.047107651446112e-09, "loss": 1.2828, "step": 4733 }, { "epoch": 1.0, "learning_rate": 1.8567899303767457e-09, "loss": 1.2395, "step": 4734 }, { "epoch": 1.0, "learning_rate": 1.6757554403223907e-09, "loss": 1.2633, "step": 4735 }, { "epoch": 1.0, "learning_rate": 1.5040042653269304e-09, "loss": 1.2112, "step": 4736 }, { "epoch": 1.0, "learning_rate": 1.3415364851132595e-09, "loss": 1.2111, "step": 4737 }, { "epoch": 1.0, "learning_rate": 1.188352175103269e-09, "loss": 1.2249, "step": 4738 }, { "epoch": 1.0, "learning_rate": 1.0444514064023025e-09, "loss": 1.2669, "step": 4739 }, { "epoch": 1.0, "learning_rate": 9.098342458102593e-10, "loss": 1.2549, "step": 4740 }, { "epoch": 1.0, "learning_rate": 7.845007558193729e-10, "loss": 1.243, "step": 4741 }, { "epoch": 1.0, "learning_rate": 6.684509946075501e-10, "loss": 1.1938, "step": 4742 }, { "epoch": 1.0, "learning_rate": 5.616850160494736e-10, "loss": 1.2334, "step": 4743 }, { "epoch": 1.0, "learning_rate": 4.642028697010581e-10, "loss": 1.2111, "step": 4744 }, { "epoch": 1.0, "learning_rate": 3.760046008172147e-10, "loss": 1.2216, "step": 4745 }, { "epoch": 1.0, "learning_rate": 2.97090250340748e-10, "loss": 1.2322, "step": 4746 }, { "epoch": 1.0, "learning_rate": 2.2745985490235656e-10, "loss": 1.1637, "step": 4747 }, { "epoch": 1.0, "learning_rate": 1.6711344682507346e-10, "loss": 1.2659, "step": 4748 }, { "epoch": 1.0, "learning_rate": 1.160510541220461e-10, "loss": 1.2167, "step": 4749 }, { "epoch": 1.0, "learning_rate": 7.427270049653601e-11, "loss": 1.2163, "step": 4750 }, { "epoch": 1.0, "learning_rate": 4.1778405341919016e-11, "loss": 1.2346, "step": 4751 }, { "epoch": 1.0, "learning_rate": 1.8568183743905566e-11, "loss": 1.2219, "step": 4752 }, { "epoch": 1.0, "learning_rate": 4.6420464738794465e-12, "loss": 1.2163, "step": 4753 }, { "epoch": 1.0, "learning_rate": 0.0, "loss": 1.1564, "step": 4754 }, { "epoch": 1.0, "step": 4754, "total_flos": 1.1295276544628886e+23, "train_loss": 1.2829506933864647, "train_runtime": 87797.1342, "train_samples_per_second": 6.931, "train_steps_per_second": 0.054 } ], "logging_steps": 1.0, "max_steps": 4754, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 5000, "total_flos": 1.1295276544628886e+23, "train_batch_size": 2, "trial_name": null, "trial_params": null }