diff --git "a/internvl2_8b_full_finetune/trainer_state.json" "b/internvl2_8b_full_finetune/trainer_state.json" new file mode 100644--- /dev/null +++ "b/internvl2_8b_full_finetune/trainer_state.json" @@ -0,0 +1,28554 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.9999474154703686, + "eval_steps": 500, + "global_step": 4754, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 2.7972027972027973e-07, + "loss": 3.7565, + "step": 1 + }, + { + "epoch": 0.0, + "learning_rate": 5.594405594405595e-07, + "loss": 3.6338, + "step": 2 + }, + { + "epoch": 0.0, + "learning_rate": 8.391608391608393e-07, + "loss": 3.6721, + "step": 3 + }, + { + "epoch": 0.0, + "learning_rate": 1.118881118881119e-06, + "loss": 3.6397, + "step": 4 + }, + { + "epoch": 0.0, + "learning_rate": 1.3986013986013987e-06, + "loss": 3.7318, + "step": 5 + }, + { + "epoch": 0.0, + "learning_rate": 1.6783216783216785e-06, + "loss": 3.3937, + "step": 6 + }, + { + "epoch": 0.0, + "learning_rate": 1.9580419580419583e-06, + "loss": 3.173, + "step": 7 + }, + { + "epoch": 0.0, + "learning_rate": 2.237762237762238e-06, + "loss": 3.0256, + "step": 8 + }, + { + "epoch": 0.0, + "learning_rate": 2.517482517482518e-06, + "loss": 2.2789, + "step": 9 + }, + { + "epoch": 0.0, + "learning_rate": 2.7972027972027974e-06, + "loss": 2.1252, + "step": 10 + }, + { + "epoch": 0.0, + "learning_rate": 3.0769230769230774e-06, + "loss": 1.8772, + "step": 11 + }, + { + "epoch": 0.0, + "learning_rate": 3.356643356643357e-06, + "loss": 1.5638, + "step": 12 + }, + { + "epoch": 0.0, + "learning_rate": 3.6363636363636366e-06, + "loss": 1.5352, + "step": 13 + }, + { + "epoch": 0.0, + "learning_rate": 3.916083916083917e-06, + "loss": 1.4646, + "step": 14 + }, + { + "epoch": 0.0, + "learning_rate": 4.195804195804197e-06, + "loss": 1.4345, + "step": 15 + }, + { + "epoch": 0.0, + "learning_rate": 4.475524475524476e-06, + "loss": 1.4364, + "step": 16 + }, + { + "epoch": 0.0, + "learning_rate": 4.755244755244756e-06, + "loss": 1.4582, + "step": 17 + }, + { + "epoch": 0.0, + "learning_rate": 5.034965034965036e-06, + "loss": 1.4391, + "step": 18 + }, + { + "epoch": 0.0, + "learning_rate": 5.314685314685315e-06, + "loss": 1.4497, + "step": 19 + }, + { + "epoch": 0.0, + "learning_rate": 5.594405594405595e-06, + "loss": 1.45, + "step": 20 + }, + { + "epoch": 0.0, + "learning_rate": 5.874125874125874e-06, + "loss": 1.4419, + "step": 21 + }, + { + "epoch": 0.0, + "learning_rate": 6.153846153846155e-06, + "loss": 1.4565, + "step": 22 + }, + { + "epoch": 0.0, + "learning_rate": 6.433566433566434e-06, + "loss": 1.4548, + "step": 23 + }, + { + "epoch": 0.01, + "learning_rate": 6.713286713286714e-06, + "loss": 1.3943, + "step": 24 + }, + { + "epoch": 0.01, + "learning_rate": 6.993006993006993e-06, + "loss": 1.4182, + "step": 25 + }, + { + "epoch": 0.01, + "learning_rate": 7.272727272727273e-06, + "loss": 1.4243, + "step": 26 + }, + { + "epoch": 0.01, + "learning_rate": 7.552447552447552e-06, + "loss": 1.4116, + "step": 27 + }, + { + "epoch": 0.01, + "learning_rate": 7.832167832167833e-06, + "loss": 1.4093, + "step": 28 + }, + { + "epoch": 0.01, + "learning_rate": 8.111888111888112e-06, + "loss": 1.4619, + "step": 29 + }, + { + "epoch": 0.01, + "learning_rate": 8.391608391608393e-06, + "loss": 1.4145, + "step": 30 + }, + { + "epoch": 0.01, + "learning_rate": 8.671328671328672e-06, + "loss": 1.3979, + "step": 31 + }, + { + "epoch": 0.01, + "learning_rate": 8.951048951048951e-06, + "loss": 1.4465, + "step": 32 + }, + { + "epoch": 0.01, + "learning_rate": 9.230769230769232e-06, + "loss": 1.3406, + "step": 33 + }, + { + "epoch": 0.01, + "learning_rate": 9.510489510489511e-06, + "loss": 1.4325, + "step": 34 + }, + { + "epoch": 0.01, + "learning_rate": 9.79020979020979e-06, + "loss": 1.4771, + "step": 35 + }, + { + "epoch": 0.01, + "learning_rate": 1.0069930069930071e-05, + "loss": 1.3757, + "step": 36 + }, + { + "epoch": 0.01, + "learning_rate": 1.034965034965035e-05, + "loss": 1.4177, + "step": 37 + }, + { + "epoch": 0.01, + "learning_rate": 1.062937062937063e-05, + "loss": 1.3953, + "step": 38 + }, + { + "epoch": 0.01, + "learning_rate": 1.0909090909090909e-05, + "loss": 1.3675, + "step": 39 + }, + { + "epoch": 0.01, + "learning_rate": 1.118881118881119e-05, + "loss": 1.3516, + "step": 40 + }, + { + "epoch": 0.01, + "learning_rate": 1.1468531468531469e-05, + "loss": 1.3917, + "step": 41 + }, + { + "epoch": 0.01, + "learning_rate": 1.1748251748251748e-05, + "loss": 1.337, + "step": 42 + }, + { + "epoch": 0.01, + "learning_rate": 1.2027972027972027e-05, + "loss": 1.3242, + "step": 43 + }, + { + "epoch": 0.01, + "learning_rate": 1.230769230769231e-05, + "loss": 1.3318, + "step": 44 + }, + { + "epoch": 0.01, + "learning_rate": 1.2587412587412589e-05, + "loss": 1.4071, + "step": 45 + }, + { + "epoch": 0.01, + "learning_rate": 1.2867132867132868e-05, + "loss": 1.392, + "step": 46 + }, + { + "epoch": 0.01, + "learning_rate": 1.3146853146853147e-05, + "loss": 1.4016, + "step": 47 + }, + { + "epoch": 0.01, + "learning_rate": 1.3426573426573428e-05, + "loss": 1.3953, + "step": 48 + }, + { + "epoch": 0.01, + "learning_rate": 1.3706293706293707e-05, + "loss": 1.3797, + "step": 49 + }, + { + "epoch": 0.01, + "learning_rate": 1.3986013986013986e-05, + "loss": 1.3455, + "step": 50 + }, + { + "epoch": 0.01, + "learning_rate": 1.4265734265734267e-05, + "loss": 1.4462, + "step": 51 + }, + { + "epoch": 0.01, + "learning_rate": 1.4545454545454546e-05, + "loss": 1.3536, + "step": 52 + }, + { + "epoch": 0.01, + "learning_rate": 1.4825174825174825e-05, + "loss": 1.4016, + "step": 53 + }, + { + "epoch": 0.01, + "learning_rate": 1.5104895104895105e-05, + "loss": 1.441, + "step": 54 + }, + { + "epoch": 0.01, + "learning_rate": 1.5384615384615387e-05, + "loss": 1.4135, + "step": 55 + }, + { + "epoch": 0.01, + "learning_rate": 1.5664335664335666e-05, + "loss": 1.4371, + "step": 56 + }, + { + "epoch": 0.01, + "learning_rate": 1.5944055944055945e-05, + "loss": 1.4199, + "step": 57 + }, + { + "epoch": 0.01, + "learning_rate": 1.6223776223776225e-05, + "loss": 1.373, + "step": 58 + }, + { + "epoch": 0.01, + "learning_rate": 1.6503496503496507e-05, + "loss": 1.4099, + "step": 59 + }, + { + "epoch": 0.01, + "learning_rate": 1.6783216783216786e-05, + "loss": 1.3375, + "step": 60 + }, + { + "epoch": 0.01, + "learning_rate": 1.7062937062937065e-05, + "loss": 1.3789, + "step": 61 + }, + { + "epoch": 0.01, + "learning_rate": 1.7342657342657345e-05, + "loss": 1.4144, + "step": 62 + }, + { + "epoch": 0.01, + "learning_rate": 1.7622377622377624e-05, + "loss": 1.3711, + "step": 63 + }, + { + "epoch": 0.01, + "learning_rate": 1.7902097902097903e-05, + "loss": 1.4263, + "step": 64 + }, + { + "epoch": 0.01, + "learning_rate": 1.8181818181818182e-05, + "loss": 1.3606, + "step": 65 + }, + { + "epoch": 0.01, + "learning_rate": 1.8461538461538465e-05, + "loss": 1.3801, + "step": 66 + }, + { + "epoch": 0.01, + "learning_rate": 1.8741258741258744e-05, + "loss": 1.414, + "step": 67 + }, + { + "epoch": 0.01, + "learning_rate": 1.9020979020979023e-05, + "loss": 1.3674, + "step": 68 + }, + { + "epoch": 0.01, + "learning_rate": 1.9300699300699302e-05, + "loss": 1.4363, + "step": 69 + }, + { + "epoch": 0.01, + "learning_rate": 1.958041958041958e-05, + "loss": 1.3421, + "step": 70 + }, + { + "epoch": 0.01, + "learning_rate": 1.986013986013986e-05, + "loss": 1.4186, + "step": 71 + }, + { + "epoch": 0.02, + "learning_rate": 2.0139860139860143e-05, + "loss": 1.7448, + "step": 72 + }, + { + "epoch": 0.02, + "learning_rate": 2.0419580419580422e-05, + "loss": 1.3815, + "step": 73 + }, + { + "epoch": 0.02, + "learning_rate": 2.06993006993007e-05, + "loss": 1.3493, + "step": 74 + }, + { + "epoch": 0.02, + "learning_rate": 2.097902097902098e-05, + "loss": 1.3953, + "step": 75 + }, + { + "epoch": 0.02, + "learning_rate": 2.125874125874126e-05, + "loss": 1.4071, + "step": 76 + }, + { + "epoch": 0.02, + "learning_rate": 2.153846153846154e-05, + "loss": 1.3465, + "step": 77 + }, + { + "epoch": 0.02, + "learning_rate": 2.1818181818181818e-05, + "loss": 1.4073, + "step": 78 + }, + { + "epoch": 0.02, + "learning_rate": 2.2097902097902097e-05, + "loss": 1.4047, + "step": 79 + }, + { + "epoch": 0.02, + "learning_rate": 2.237762237762238e-05, + "loss": 1.3887, + "step": 80 + }, + { + "epoch": 0.02, + "learning_rate": 2.265734265734266e-05, + "loss": 1.3287, + "step": 81 + }, + { + "epoch": 0.02, + "learning_rate": 2.2937062937062938e-05, + "loss": 1.3959, + "step": 82 + }, + { + "epoch": 0.02, + "learning_rate": 2.3216783216783217e-05, + "loss": 1.3785, + "step": 83 + }, + { + "epoch": 0.02, + "learning_rate": 2.3496503496503496e-05, + "loss": 1.4571, + "step": 84 + }, + { + "epoch": 0.02, + "learning_rate": 2.3776223776223775e-05, + "loss": 1.3935, + "step": 85 + }, + { + "epoch": 0.02, + "learning_rate": 2.4055944055944054e-05, + "loss": 1.4453, + "step": 86 + }, + { + "epoch": 0.02, + "learning_rate": 2.433566433566434e-05, + "loss": 1.3951, + "step": 87 + }, + { + "epoch": 0.02, + "learning_rate": 2.461538461538462e-05, + "loss": 1.3483, + "step": 88 + }, + { + "epoch": 0.02, + "learning_rate": 2.48951048951049e-05, + "loss": 1.3968, + "step": 89 + }, + { + "epoch": 0.02, + "learning_rate": 2.5174825174825178e-05, + "loss": 1.3972, + "step": 90 + }, + { + "epoch": 0.02, + "learning_rate": 2.5454545454545457e-05, + "loss": 1.4067, + "step": 91 + }, + { + "epoch": 0.02, + "learning_rate": 2.5734265734265736e-05, + "loss": 1.428, + "step": 92 + }, + { + "epoch": 0.02, + "learning_rate": 2.6013986013986015e-05, + "loss": 1.4095, + "step": 93 + }, + { + "epoch": 0.02, + "learning_rate": 2.6293706293706294e-05, + "loss": 1.4353, + "step": 94 + }, + { + "epoch": 0.02, + "learning_rate": 2.6573426573426577e-05, + "loss": 1.2736, + "step": 95 + }, + { + "epoch": 0.02, + "learning_rate": 2.6853146853146856e-05, + "loss": 1.4096, + "step": 96 + }, + { + "epoch": 0.02, + "learning_rate": 2.7132867132867135e-05, + "loss": 1.3583, + "step": 97 + }, + { + "epoch": 0.02, + "learning_rate": 2.7412587412587414e-05, + "loss": 1.3911, + "step": 98 + }, + { + "epoch": 0.02, + "learning_rate": 2.7692307692307694e-05, + "loss": 1.3625, + "step": 99 + }, + { + "epoch": 0.02, + "learning_rate": 2.7972027972027973e-05, + "loss": 1.3575, + "step": 100 + }, + { + "epoch": 0.02, + "learning_rate": 2.8251748251748252e-05, + "loss": 1.2927, + "step": 101 + }, + { + "epoch": 0.02, + "learning_rate": 2.8531468531468534e-05, + "loss": 1.3838, + "step": 102 + }, + { + "epoch": 0.02, + "learning_rate": 2.8811188811188814e-05, + "loss": 1.3662, + "step": 103 + }, + { + "epoch": 0.02, + "learning_rate": 2.9090909090909093e-05, + "loss": 1.3649, + "step": 104 + }, + { + "epoch": 0.02, + "learning_rate": 2.9370629370629372e-05, + "loss": 1.3942, + "step": 105 + }, + { + "epoch": 0.02, + "learning_rate": 2.965034965034965e-05, + "loss": 1.4093, + "step": 106 + }, + { + "epoch": 0.02, + "learning_rate": 2.993006993006993e-05, + "loss": 1.3986, + "step": 107 + }, + { + "epoch": 0.02, + "learning_rate": 3.020979020979021e-05, + "loss": 1.3586, + "step": 108 + }, + { + "epoch": 0.02, + "learning_rate": 3.048951048951049e-05, + "loss": 1.4076, + "step": 109 + }, + { + "epoch": 0.02, + "learning_rate": 3.0769230769230774e-05, + "loss": 1.3857, + "step": 110 + }, + { + "epoch": 0.02, + "learning_rate": 3.104895104895105e-05, + "loss": 1.3899, + "step": 111 + }, + { + "epoch": 0.02, + "learning_rate": 3.132867132867133e-05, + "loss": 1.3579, + "step": 112 + }, + { + "epoch": 0.02, + "learning_rate": 3.160839160839161e-05, + "loss": 1.3288, + "step": 113 + }, + { + "epoch": 0.02, + "learning_rate": 3.188811188811189e-05, + "loss": 1.3219, + "step": 114 + }, + { + "epoch": 0.02, + "learning_rate": 3.216783216783217e-05, + "loss": 1.4371, + "step": 115 + }, + { + "epoch": 0.02, + "learning_rate": 3.244755244755245e-05, + "loss": 1.4103, + "step": 116 + }, + { + "epoch": 0.02, + "learning_rate": 3.272727272727273e-05, + "loss": 1.3679, + "step": 117 + }, + { + "epoch": 0.02, + "learning_rate": 3.3006993006993014e-05, + "loss": 1.4001, + "step": 118 + }, + { + "epoch": 0.03, + "learning_rate": 3.328671328671329e-05, + "loss": 1.3716, + "step": 119 + }, + { + "epoch": 0.03, + "learning_rate": 3.356643356643357e-05, + "loss": 1.4149, + "step": 120 + }, + { + "epoch": 0.03, + "learning_rate": 3.384615384615385e-05, + "loss": 1.3703, + "step": 121 + }, + { + "epoch": 0.03, + "learning_rate": 3.412587412587413e-05, + "loss": 1.4115, + "step": 122 + }, + { + "epoch": 0.03, + "learning_rate": 3.440559440559441e-05, + "loss": 1.4308, + "step": 123 + }, + { + "epoch": 0.03, + "learning_rate": 3.468531468531469e-05, + "loss": 1.4214, + "step": 124 + }, + { + "epoch": 0.03, + "learning_rate": 3.496503496503497e-05, + "loss": 1.3922, + "step": 125 + }, + { + "epoch": 0.03, + "learning_rate": 3.524475524475525e-05, + "loss": 2.2952, + "step": 126 + }, + { + "epoch": 0.03, + "learning_rate": 3.552447552447553e-05, + "loss": 1.4082, + "step": 127 + }, + { + "epoch": 0.03, + "learning_rate": 3.5804195804195806e-05, + "loss": 1.4501, + "step": 128 + }, + { + "epoch": 0.03, + "learning_rate": 3.608391608391609e-05, + "loss": 1.4269, + "step": 129 + }, + { + "epoch": 0.03, + "learning_rate": 3.6363636363636364e-05, + "loss": 1.3986, + "step": 130 + }, + { + "epoch": 0.03, + "learning_rate": 3.664335664335665e-05, + "loss": 1.429, + "step": 131 + }, + { + "epoch": 0.03, + "learning_rate": 3.692307692307693e-05, + "loss": 1.3727, + "step": 132 + }, + { + "epoch": 0.03, + "learning_rate": 3.7202797202797205e-05, + "loss": 1.3966, + "step": 133 + }, + { + "epoch": 0.03, + "learning_rate": 3.748251748251749e-05, + "loss": 1.4479, + "step": 134 + }, + { + "epoch": 0.03, + "learning_rate": 3.776223776223776e-05, + "loss": 1.2996, + "step": 135 + }, + { + "epoch": 0.03, + "learning_rate": 3.8041958041958046e-05, + "loss": 1.3981, + "step": 136 + }, + { + "epoch": 0.03, + "learning_rate": 3.832167832167832e-05, + "loss": 1.4238, + "step": 137 + }, + { + "epoch": 0.03, + "learning_rate": 3.8601398601398604e-05, + "loss": 1.3833, + "step": 138 + }, + { + "epoch": 0.03, + "learning_rate": 3.888111888111888e-05, + "loss": 1.4186, + "step": 139 + }, + { + "epoch": 0.03, + "learning_rate": 3.916083916083916e-05, + "loss": 1.3688, + "step": 140 + }, + { + "epoch": 0.03, + "learning_rate": 3.9440559440559445e-05, + "loss": 1.3687, + "step": 141 + }, + { + "epoch": 0.03, + "learning_rate": 3.972027972027972e-05, + "loss": 1.3736, + "step": 142 + }, + { + "epoch": 0.03, + "learning_rate": 4e-05, + "loss": 1.3654, + "step": 143 + }, + { + "epoch": 0.03, + "learning_rate": 3.999999535795353e-05, + "loss": 1.3127, + "step": 144 + }, + { + "epoch": 0.03, + "learning_rate": 3.9999981431816256e-05, + "loss": 1.3956, + "step": 145 + }, + { + "epoch": 0.03, + "learning_rate": 3.999995822159466e-05, + "loss": 1.3325, + "step": 146 + }, + { + "epoch": 0.03, + "learning_rate": 3.9999925727299505e-05, + "loss": 1.3311, + "step": 147 + }, + { + "epoch": 0.03, + "learning_rate": 3.999988394894588e-05, + "loss": 1.371, + "step": 148 + }, + { + "epoch": 0.03, + "learning_rate": 3.999983288655318e-05, + "loss": 1.3063, + "step": 149 + }, + { + "epoch": 0.03, + "learning_rate": 3.9999772540145104e-05, + "loss": 1.4212, + "step": 150 + }, + { + "epoch": 0.03, + "learning_rate": 3.999970290974967e-05, + "loss": 1.3853, + "step": 151 + }, + { + "epoch": 0.03, + "learning_rate": 3.999962399539919e-05, + "loss": 1.3556, + "step": 152 + }, + { + "epoch": 0.03, + "learning_rate": 3.9999535797130304e-05, + "loss": 1.3849, + "step": 153 + }, + { + "epoch": 0.03, + "learning_rate": 3.999943831498395e-05, + "loss": 1.3933, + "step": 154 + }, + { + "epoch": 0.03, + "learning_rate": 3.9999331549005394e-05, + "loss": 1.3331, + "step": 155 + }, + { + "epoch": 0.03, + "learning_rate": 3.999921549924418e-05, + "loss": 1.3488, + "step": 156 + }, + { + "epoch": 0.03, + "learning_rate": 3.999909016575419e-05, + "loss": 1.4472, + "step": 157 + }, + { + "epoch": 0.03, + "learning_rate": 3.99989555485936e-05, + "loss": 1.4994, + "step": 158 + }, + { + "epoch": 0.03, + "learning_rate": 3.99988116478249e-05, + "loss": 1.4134, + "step": 159 + }, + { + "epoch": 0.03, + "learning_rate": 3.999865846351489e-05, + "loss": 1.3904, + "step": 160 + }, + { + "epoch": 0.03, + "learning_rate": 3.9998495995734677e-05, + "loss": 1.3941, + "step": 161 + }, + { + "epoch": 0.03, + "learning_rate": 3.999832424455968e-05, + "loss": 1.3369, + "step": 162 + }, + { + "epoch": 0.03, + "learning_rate": 3.999814321006963e-05, + "loss": 1.3989, + "step": 163 + }, + { + "epoch": 0.03, + "learning_rate": 3.999795289234856e-05, + "loss": 1.3794, + "step": 164 + }, + { + "epoch": 0.03, + "learning_rate": 3.9997753291484816e-05, + "loss": 1.3702, + "step": 165 + }, + { + "epoch": 0.03, + "learning_rate": 3.999754440757105e-05, + "loss": 1.322, + "step": 166 + }, + { + "epoch": 0.04, + "learning_rate": 3.999732624070424e-05, + "loss": 1.4168, + "step": 167 + }, + { + "epoch": 0.04, + "learning_rate": 3.999709879098565e-05, + "loss": 1.3924, + "step": 168 + }, + { + "epoch": 0.04, + "learning_rate": 3.999686205852087e-05, + "loss": 1.3191, + "step": 169 + }, + { + "epoch": 0.04, + "learning_rate": 3.999661604341978e-05, + "loss": 1.398, + "step": 170 + }, + { + "epoch": 0.04, + "learning_rate": 3.99963607457966e-05, + "loss": 1.3207, + "step": 171 + }, + { + "epoch": 0.04, + "learning_rate": 3.999609616576982e-05, + "loss": 1.3763, + "step": 172 + }, + { + "epoch": 0.04, + "learning_rate": 3.9995822303462273e-05, + "loss": 1.3931, + "step": 173 + }, + { + "epoch": 0.04, + "learning_rate": 3.9995539159001074e-05, + "loss": 1.382, + "step": 174 + }, + { + "epoch": 0.04, + "learning_rate": 3.999524673251768e-05, + "loss": 1.4525, + "step": 175 + }, + { + "epoch": 0.04, + "learning_rate": 3.999494502414783e-05, + "loss": 1.4071, + "step": 176 + }, + { + "epoch": 0.04, + "learning_rate": 3.999463403403156e-05, + "loss": 1.4079, + "step": 177 + }, + { + "epoch": 0.04, + "learning_rate": 3.999431376231326e-05, + "loss": 1.44, + "step": 178 + }, + { + "epoch": 0.04, + "learning_rate": 3.9993984209141576e-05, + "loss": 1.4475, + "step": 179 + }, + { + "epoch": 0.04, + "learning_rate": 3.999364537466951e-05, + "loss": 1.4105, + "step": 180 + }, + { + "epoch": 0.04, + "learning_rate": 3.999329725905434e-05, + "loss": 1.3631, + "step": 181 + }, + { + "epoch": 0.04, + "learning_rate": 3.999293986245766e-05, + "loss": 1.3998, + "step": 182 + }, + { + "epoch": 0.04, + "learning_rate": 3.9992573185045386e-05, + "loss": 1.3647, + "step": 183 + }, + { + "epoch": 0.04, + "learning_rate": 3.9992197226987725e-05, + "loss": 1.4261, + "step": 184 + }, + { + "epoch": 0.04, + "learning_rate": 3.999181198845919e-05, + "loss": 1.3835, + "step": 185 + }, + { + "epoch": 0.04, + "learning_rate": 3.999141746963862e-05, + "loss": 1.3491, + "step": 186 + }, + { + "epoch": 0.04, + "learning_rate": 3.999101367070916e-05, + "loss": 1.3868, + "step": 187 + }, + { + "epoch": 0.04, + "learning_rate": 3.9990600591858244e-05, + "loss": 1.3502, + "step": 188 + }, + { + "epoch": 0.04, + "learning_rate": 3.999017823327762e-05, + "loss": 1.3767, + "step": 189 + }, + { + "epoch": 0.04, + "learning_rate": 3.9989746595163364e-05, + "loss": 1.4154, + "step": 190 + }, + { + "epoch": 0.04, + "learning_rate": 3.998930567771583e-05, + "loss": 1.3643, + "step": 191 + }, + { + "epoch": 0.04, + "learning_rate": 3.998885548113971e-05, + "loss": 1.427, + "step": 192 + }, + { + "epoch": 0.04, + "learning_rate": 3.998839600564398e-05, + "loss": 1.3278, + "step": 193 + }, + { + "epoch": 0.04, + "learning_rate": 3.998792725144192e-05, + "loss": 1.364, + "step": 194 + }, + { + "epoch": 0.04, + "learning_rate": 3.9987449218751134e-05, + "loss": 1.3331, + "step": 195 + }, + { + "epoch": 0.04, + "learning_rate": 3.998696190779354e-05, + "loss": 1.3721, + "step": 196 + }, + { + "epoch": 0.04, + "learning_rate": 3.9986465318795336e-05, + "loss": 1.4242, + "step": 197 + }, + { + "epoch": 0.04, + "learning_rate": 3.998595945198705e-05, + "loss": 1.3941, + "step": 198 + }, + { + "epoch": 0.04, + "learning_rate": 3.9985444307603497e-05, + "loss": 1.398, + "step": 199 + }, + { + "epoch": 0.04, + "learning_rate": 3.998491988588381e-05, + "loss": 1.3835, + "step": 200 + }, + { + "epoch": 0.04, + "learning_rate": 3.998438618707144e-05, + "loss": 1.383, + "step": 201 + }, + { + "epoch": 0.04, + "learning_rate": 3.9983843211414124e-05, + "loss": 1.4021, + "step": 202 + }, + { + "epoch": 0.04, + "learning_rate": 3.9983290959163914e-05, + "loss": 1.3831, + "step": 203 + }, + { + "epoch": 0.04, + "learning_rate": 3.998272943057717e-05, + "loss": 1.3201, + "step": 204 + }, + { + "epoch": 0.04, + "learning_rate": 3.998215862591455e-05, + "loss": 1.3822, + "step": 205 + }, + { + "epoch": 0.04, + "learning_rate": 3.998157854544104e-05, + "loss": 1.3514, + "step": 206 + }, + { + "epoch": 0.04, + "learning_rate": 3.99809891894259e-05, + "loss": 1.3992, + "step": 207 + }, + { + "epoch": 0.04, + "learning_rate": 3.998039055814272e-05, + "loss": 1.3909, + "step": 208 + }, + { + "epoch": 0.04, + "learning_rate": 3.9979782651869384e-05, + "loss": 1.3097, + "step": 209 + }, + { + "epoch": 0.04, + "learning_rate": 3.997916547088808e-05, + "loss": 1.3593, + "step": 210 + }, + { + "epoch": 0.04, + "learning_rate": 3.997853901548532e-05, + "loss": 1.3284, + "step": 211 + }, + { + "epoch": 0.04, + "learning_rate": 3.9977903285951896e-05, + "loss": 1.3946, + "step": 212 + }, + { + "epoch": 0.04, + "learning_rate": 3.9977258282582916e-05, + "loss": 1.3739, + "step": 213 + }, + { + "epoch": 0.05, + "learning_rate": 3.99766040056778e-05, + "loss": 1.371, + "step": 214 + }, + { + "epoch": 0.05, + "learning_rate": 3.997594045554027e-05, + "loss": 1.3733, + "step": 215 + }, + { + "epoch": 0.05, + "learning_rate": 3.9975267632478336e-05, + "loss": 1.3776, + "step": 216 + }, + { + "epoch": 0.05, + "learning_rate": 3.997458553680434e-05, + "loss": 1.3519, + "step": 217 + }, + { + "epoch": 0.05, + "learning_rate": 3.99738941688349e-05, + "loss": 1.3254, + "step": 218 + }, + { + "epoch": 0.05, + "learning_rate": 3.997319352889096e-05, + "loss": 1.3786, + "step": 219 + }, + { + "epoch": 0.05, + "learning_rate": 3.997248361729777e-05, + "loss": 1.3308, + "step": 220 + }, + { + "epoch": 0.05, + "learning_rate": 3.997176443438485e-05, + "loss": 1.3666, + "step": 221 + }, + { + "epoch": 0.05, + "learning_rate": 3.997103598048607e-05, + "loss": 1.3565, + "step": 222 + }, + { + "epoch": 0.05, + "learning_rate": 3.9970298255939564e-05, + "loss": 1.342, + "step": 223 + }, + { + "epoch": 0.05, + "learning_rate": 3.9969551261087806e-05, + "loss": 1.4034, + "step": 224 + }, + { + "epoch": 0.05, + "learning_rate": 3.996879499627754e-05, + "loss": 1.3279, + "step": 225 + }, + { + "epoch": 0.05, + "learning_rate": 3.996802946185984e-05, + "loss": 1.3397, + "step": 226 + }, + { + "epoch": 0.05, + "learning_rate": 3.9967254658190055e-05, + "loss": 1.4121, + "step": 227 + }, + { + "epoch": 0.05, + "learning_rate": 3.996647058562786e-05, + "loss": 1.3868, + "step": 228 + }, + { + "epoch": 0.05, + "learning_rate": 3.9965677244537226e-05, + "loss": 1.2964, + "step": 229 + }, + { + "epoch": 0.05, + "learning_rate": 3.9964874635286436e-05, + "loss": 1.3671, + "step": 230 + }, + { + "epoch": 0.05, + "learning_rate": 3.996406275824804e-05, + "loss": 1.3471, + "step": 231 + }, + { + "epoch": 0.05, + "learning_rate": 3.996324161379894e-05, + "loss": 1.309, + "step": 232 + }, + { + "epoch": 0.05, + "learning_rate": 3.9962411202320296e-05, + "loss": 1.382, + "step": 233 + }, + { + "epoch": 0.05, + "learning_rate": 3.99615715241976e-05, + "loss": 1.3367, + "step": 234 + }, + { + "epoch": 0.05, + "learning_rate": 3.996072257982064e-05, + "loss": 1.3915, + "step": 235 + }, + { + "epoch": 0.05, + "learning_rate": 3.9959864369583485e-05, + "loss": 1.423, + "step": 236 + }, + { + "epoch": 0.05, + "learning_rate": 3.9958996893884525e-05, + "loss": 1.3617, + "step": 237 + }, + { + "epoch": 0.05, + "learning_rate": 3.9958120153126454e-05, + "loss": 1.3036, + "step": 238 + }, + { + "epoch": 0.05, + "learning_rate": 3.995723414771625e-05, + "loss": 1.3381, + "step": 239 + }, + { + "epoch": 0.05, + "learning_rate": 3.9956338878065205e-05, + "loss": 1.3624, + "step": 240 + }, + { + "epoch": 0.05, + "learning_rate": 3.99554343445889e-05, + "loss": 1.4119, + "step": 241 + }, + { + "epoch": 0.05, + "learning_rate": 3.995452054770724e-05, + "loss": 1.391, + "step": 242 + }, + { + "epoch": 0.05, + "learning_rate": 3.99535974878444e-05, + "loss": 1.3295, + "step": 243 + }, + { + "epoch": 0.05, + "learning_rate": 3.995266516542887e-05, + "loss": 1.3407, + "step": 244 + }, + { + "epoch": 0.05, + "learning_rate": 3.995172358089344e-05, + "loss": 1.3814, + "step": 245 + }, + { + "epoch": 0.05, + "learning_rate": 3.995077273467521e-05, + "loss": 1.3753, + "step": 246 + }, + { + "epoch": 0.05, + "learning_rate": 3.994981262721555e-05, + "loss": 1.337, + "step": 247 + }, + { + "epoch": 0.05, + "learning_rate": 3.9948843258960154e-05, + "loss": 1.3938, + "step": 248 + }, + { + "epoch": 0.05, + "learning_rate": 3.9947864630359005e-05, + "loss": 1.3571, + "step": 249 + }, + { + "epoch": 0.05, + "learning_rate": 3.994687674186638e-05, + "loss": 1.3662, + "step": 250 + }, + { + "epoch": 0.05, + "learning_rate": 3.9945879593940874e-05, + "loss": 1.3636, + "step": 251 + }, + { + "epoch": 0.05, + "learning_rate": 3.994487318704536e-05, + "loss": 1.3692, + "step": 252 + }, + { + "epoch": 0.05, + "learning_rate": 3.994385752164703e-05, + "loss": 1.413, + "step": 253 + }, + { + "epoch": 0.05, + "learning_rate": 3.9942832598217345e-05, + "loss": 1.4184, + "step": 254 + }, + { + "epoch": 0.05, + "learning_rate": 3.9941798417232084e-05, + "loss": 1.3372, + "step": 255 + }, + { + "epoch": 0.05, + "learning_rate": 3.9940754979171317e-05, + "loss": 1.3661, + "step": 256 + }, + { + "epoch": 0.05, + "learning_rate": 3.9939702284519416e-05, + "loss": 1.393, + "step": 257 + }, + { + "epoch": 0.05, + "learning_rate": 3.9938640333765046e-05, + "loss": 1.4067, + "step": 258 + }, + { + "epoch": 0.05, + "learning_rate": 3.993756912740117e-05, + "loss": 1.358, + "step": 259 + }, + { + "epoch": 0.05, + "learning_rate": 3.9936488665925045e-05, + "loss": 1.3745, + "step": 260 + }, + { + "epoch": 0.05, + "learning_rate": 3.993539894983823e-05, + "loss": 1.3749, + "step": 261 + }, + { + "epoch": 0.06, + "learning_rate": 3.993429997964657e-05, + "loss": 1.3752, + "step": 262 + }, + { + "epoch": 0.06, + "learning_rate": 3.993319175586021e-05, + "loss": 1.3392, + "step": 263 + }, + { + "epoch": 0.06, + "learning_rate": 3.9932074278993604e-05, + "loss": 1.4217, + "step": 264 + }, + { + "epoch": 0.06, + "learning_rate": 3.993094754956549e-05, + "loss": 1.4214, + "step": 265 + }, + { + "epoch": 0.06, + "learning_rate": 3.992981156809889e-05, + "loss": 1.3625, + "step": 266 + }, + { + "epoch": 0.06, + "learning_rate": 3.9928666335121135e-05, + "loss": 1.4306, + "step": 267 + }, + { + "epoch": 0.06, + "learning_rate": 3.992751185116385e-05, + "loss": 1.3936, + "step": 268 + }, + { + "epoch": 0.06, + "learning_rate": 3.992634811676296e-05, + "loss": 1.3801, + "step": 269 + }, + { + "epoch": 0.06, + "learning_rate": 3.992517513245865e-05, + "loss": 1.3638, + "step": 270 + }, + { + "epoch": 0.06, + "learning_rate": 3.992399289879546e-05, + "loss": 1.4009, + "step": 271 + }, + { + "epoch": 0.06, + "learning_rate": 3.992280141632216e-05, + "loss": 1.3206, + "step": 272 + }, + { + "epoch": 0.06, + "learning_rate": 3.9921600685591856e-05, + "loss": 1.3925, + "step": 273 + }, + { + "epoch": 0.06, + "learning_rate": 3.9920390707161927e-05, + "loss": 1.4102, + "step": 274 + }, + { + "epoch": 0.06, + "learning_rate": 3.9919171481594056e-05, + "loss": 1.348, + "step": 275 + }, + { + "epoch": 0.06, + "learning_rate": 3.9917943009454206e-05, + "loss": 1.3055, + "step": 276 + }, + { + "epoch": 0.06, + "learning_rate": 3.9916705291312646e-05, + "loss": 1.3889, + "step": 277 + }, + { + "epoch": 0.06, + "learning_rate": 3.991545832774393e-05, + "loss": 1.3769, + "step": 278 + }, + { + "epoch": 0.06, + "learning_rate": 3.9914202119326895e-05, + "loss": 1.3341, + "step": 279 + }, + { + "epoch": 0.06, + "learning_rate": 3.991293666664469e-05, + "loss": 1.2588, + "step": 280 + }, + { + "epoch": 0.06, + "learning_rate": 3.991166197028474e-05, + "loss": 1.3687, + "step": 281 + }, + { + "epoch": 0.06, + "learning_rate": 3.9910378030838765e-05, + "loss": 1.3817, + "step": 282 + }, + { + "epoch": 0.06, + "learning_rate": 3.990908484890277e-05, + "loss": 1.3244, + "step": 283 + }, + { + "epoch": 0.06, + "learning_rate": 3.990778242507707e-05, + "loss": 1.3482, + "step": 284 + }, + { + "epoch": 0.06, + "learning_rate": 3.990647075996624e-05, + "loss": 1.3568, + "step": 285 + }, + { + "epoch": 0.06, + "learning_rate": 3.9905149854179174e-05, + "loss": 1.3464, + "step": 286 + }, + { + "epoch": 0.06, + "learning_rate": 3.990381970832903e-05, + "loss": 1.3846, + "step": 287 + }, + { + "epoch": 0.06, + "learning_rate": 3.9902480323033285e-05, + "loss": 1.3303, + "step": 288 + }, + { + "epoch": 0.06, + "learning_rate": 3.990113169891367e-05, + "loss": 1.3757, + "step": 289 + }, + { + "epoch": 0.06, + "learning_rate": 3.989977383659624e-05, + "loss": 1.3703, + "step": 290 + }, + { + "epoch": 0.06, + "learning_rate": 3.989840673671131e-05, + "loss": 1.3292, + "step": 291 + }, + { + "epoch": 0.06, + "learning_rate": 3.989703039989349e-05, + "loss": 1.3497, + "step": 292 + }, + { + "epoch": 0.06, + "learning_rate": 3.989564482678168e-05, + "loss": 1.3751, + "step": 293 + }, + { + "epoch": 0.06, + "learning_rate": 3.989425001801909e-05, + "loss": 1.384, + "step": 294 + }, + { + "epoch": 0.06, + "learning_rate": 3.9892845974253184e-05, + "loss": 1.3478, + "step": 295 + }, + { + "epoch": 0.06, + "learning_rate": 3.989143269613572e-05, + "loss": 1.3822, + "step": 296 + }, + { + "epoch": 0.06, + "learning_rate": 3.989001018432276e-05, + "loss": 1.3131, + "step": 297 + }, + { + "epoch": 0.06, + "learning_rate": 3.988857843947463e-05, + "loss": 1.3686, + "step": 298 + }, + { + "epoch": 0.06, + "learning_rate": 3.988713746225596e-05, + "loss": 1.4093, + "step": 299 + }, + { + "epoch": 0.06, + "learning_rate": 3.988568725333565e-05, + "loss": 1.4109, + "step": 300 + }, + { + "epoch": 0.06, + "learning_rate": 3.98842278133869e-05, + "loss": 1.3869, + "step": 301 + }, + { + "epoch": 0.06, + "learning_rate": 3.9882759143087194e-05, + "loss": 1.3923, + "step": 302 + }, + { + "epoch": 0.06, + "learning_rate": 3.9881281243118285e-05, + "loss": 1.3905, + "step": 303 + }, + { + "epoch": 0.06, + "learning_rate": 3.987979411416623e-05, + "loss": 1.3664, + "step": 304 + }, + { + "epoch": 0.06, + "learning_rate": 3.987829775692135e-05, + "loss": 1.3708, + "step": 305 + }, + { + "epoch": 0.06, + "learning_rate": 3.987679217207827e-05, + "loss": 1.2989, + "step": 306 + }, + { + "epoch": 0.06, + "learning_rate": 3.987527736033589e-05, + "loss": 1.3313, + "step": 307 + }, + { + "epoch": 0.06, + "learning_rate": 3.987375332239739e-05, + "loss": 1.342, + "step": 308 + }, + { + "epoch": 0.06, + "learning_rate": 3.9872220058970226e-05, + "loss": 1.3617, + "step": 309 + }, + { + "epoch": 0.07, + "learning_rate": 3.9870677570766167e-05, + "loss": 1.3414, + "step": 310 + }, + { + "epoch": 0.07, + "learning_rate": 3.986912585850123e-05, + "loss": 1.346, + "step": 311 + }, + { + "epoch": 0.07, + "learning_rate": 3.9867564922895724e-05, + "loss": 1.3326, + "step": 312 + }, + { + "epoch": 0.07, + "learning_rate": 3.986599476467425e-05, + "loss": 1.3472, + "step": 313 + }, + { + "epoch": 0.07, + "learning_rate": 3.9864415384565675e-05, + "loss": 1.3789, + "step": 314 + }, + { + "epoch": 0.07, + "learning_rate": 3.986282678330316e-05, + "loss": 1.3781, + "step": 315 + }, + { + "epoch": 0.07, + "learning_rate": 3.9861228961624146e-05, + "loss": 1.2815, + "step": 316 + }, + { + "epoch": 0.07, + "learning_rate": 3.985962192027034e-05, + "loss": 1.3893, + "step": 317 + }, + { + "epoch": 0.07, + "learning_rate": 3.985800565998775e-05, + "loss": 1.3448, + "step": 318 + }, + { + "epoch": 0.07, + "learning_rate": 3.9856380181526634e-05, + "loss": 1.3941, + "step": 319 + }, + { + "epoch": 0.07, + "learning_rate": 3.9854745485641556e-05, + "loss": 1.388, + "step": 320 + }, + { + "epoch": 0.07, + "learning_rate": 3.985310157309135e-05, + "loss": 1.3739, + "step": 321 + }, + { + "epoch": 0.07, + "learning_rate": 3.985144844463913e-05, + "loss": 1.3588, + "step": 322 + }, + { + "epoch": 0.07, + "learning_rate": 3.9849786101052285e-05, + "loss": 1.3605, + "step": 323 + }, + { + "epoch": 0.07, + "learning_rate": 3.984811454310248e-05, + "loss": 1.4181, + "step": 324 + }, + { + "epoch": 0.07, + "learning_rate": 3.9846433771565655e-05, + "loss": 1.3938, + "step": 325 + }, + { + "epoch": 0.07, + "learning_rate": 3.9844743787222046e-05, + "loss": 1.3191, + "step": 326 + }, + { + "epoch": 0.07, + "learning_rate": 3.984304459085614e-05, + "loss": 1.3999, + "step": 327 + }, + { + "epoch": 0.07, + "learning_rate": 3.984133618325671e-05, + "loss": 1.3927, + "step": 328 + }, + { + "epoch": 0.07, + "learning_rate": 3.983961856521682e-05, + "loss": 1.4153, + "step": 329 + }, + { + "epoch": 0.07, + "learning_rate": 3.983789173753378e-05, + "loss": 1.3608, + "step": 330 + }, + { + "epoch": 0.07, + "learning_rate": 3.983615570100921e-05, + "loss": 1.3745, + "step": 331 + }, + { + "epoch": 0.07, + "learning_rate": 3.9834410456448966e-05, + "loss": 1.3283, + "step": 332 + }, + { + "epoch": 0.07, + "learning_rate": 3.983265600466321e-05, + "loss": 1.3819, + "step": 333 + }, + { + "epoch": 0.07, + "learning_rate": 3.983089234646637e-05, + "loss": 1.3575, + "step": 334 + }, + { + "epoch": 0.07, + "learning_rate": 3.9829119482677144e-05, + "loss": 1.3852, + "step": 335 + }, + { + "epoch": 0.07, + "learning_rate": 3.9827337414118486e-05, + "loss": 1.3329, + "step": 336 + }, + { + "epoch": 0.07, + "learning_rate": 3.982554614161766e-05, + "loss": 1.3351, + "step": 337 + }, + { + "epoch": 0.07, + "learning_rate": 3.9823745666006176e-05, + "loss": 1.3919, + "step": 338 + }, + { + "epoch": 0.07, + "learning_rate": 3.982193598811983e-05, + "loss": 1.3631, + "step": 339 + }, + { + "epoch": 0.07, + "learning_rate": 3.9820117108798666e-05, + "loss": 1.3529, + "step": 340 + }, + { + "epoch": 0.07, + "learning_rate": 3.981828902888704e-05, + "loss": 1.3448, + "step": 341 + }, + { + "epoch": 0.07, + "learning_rate": 3.981645174923353e-05, + "loss": 1.3787, + "step": 342 + }, + { + "epoch": 0.07, + "learning_rate": 3.9814605270691025e-05, + "loss": 1.3678, + "step": 343 + }, + { + "epoch": 0.07, + "learning_rate": 3.981274959411667e-05, + "loss": 1.3527, + "step": 344 + }, + { + "epoch": 0.07, + "learning_rate": 3.9810884720371874e-05, + "loss": 1.3531, + "step": 345 + }, + { + "epoch": 0.07, + "learning_rate": 3.980901065032232e-05, + "loss": 1.3508, + "step": 346 + }, + { + "epoch": 0.07, + "learning_rate": 3.9807127384837955e-05, + "loss": 1.3343, + "step": 347 + }, + { + "epoch": 0.07, + "learning_rate": 3.980523492479301e-05, + "loss": 1.374, + "step": 348 + }, + { + "epoch": 0.07, + "learning_rate": 3.980333327106596e-05, + "loss": 1.326, + "step": 349 + }, + { + "epoch": 0.07, + "learning_rate": 3.980142242453958e-05, + "loss": 1.326, + "step": 350 + }, + { + "epoch": 0.07, + "learning_rate": 3.979950238610088e-05, + "loss": 1.2934, + "step": 351 + }, + { + "epoch": 0.07, + "learning_rate": 3.9797573156641165e-05, + "loss": 1.3524, + "step": 352 + }, + { + "epoch": 0.07, + "learning_rate": 3.9795634737055974e-05, + "loss": 1.3736, + "step": 353 + }, + { + "epoch": 0.07, + "learning_rate": 3.979368712824514e-05, + "loss": 1.3553, + "step": 354 + }, + { + "epoch": 0.07, + "learning_rate": 3.979173033111275e-05, + "loss": 1.337, + "step": 355 + }, + { + "epoch": 0.07, + "learning_rate": 3.9789764346567154e-05, + "loss": 1.3571, + "step": 356 + }, + { + "epoch": 0.08, + "learning_rate": 3.978778917552099e-05, + "loss": 1.3803, + "step": 357 + }, + { + "epoch": 0.08, + "learning_rate": 3.9785804818891117e-05, + "loss": 1.3336, + "step": 358 + }, + { + "epoch": 0.08, + "learning_rate": 3.978381127759869e-05, + "loss": 1.3586, + "step": 359 + }, + { + "epoch": 0.08, + "learning_rate": 3.9781808552569134e-05, + "loss": 1.3318, + "step": 360 + }, + { + "epoch": 0.08, + "learning_rate": 3.97797966447321e-05, + "loss": 1.4023, + "step": 361 + }, + { + "epoch": 0.08, + "learning_rate": 3.977777555502155e-05, + "loss": 1.3615, + "step": 362 + }, + { + "epoch": 0.08, + "learning_rate": 3.977574528437567e-05, + "loss": 1.3628, + "step": 363 + }, + { + "epoch": 0.08, + "learning_rate": 3.977370583373692e-05, + "loss": 1.3542, + "step": 364 + }, + { + "epoch": 0.08, + "learning_rate": 3.9771657204052026e-05, + "loss": 1.3581, + "step": 365 + }, + { + "epoch": 0.08, + "learning_rate": 3.976959939627196e-05, + "loss": 1.3722, + "step": 366 + }, + { + "epoch": 0.08, + "learning_rate": 3.9767532411351985e-05, + "loss": 1.3393, + "step": 367 + }, + { + "epoch": 0.08, + "learning_rate": 3.97654562502516e-05, + "loss": 1.4009, + "step": 368 + }, + { + "epoch": 0.08, + "learning_rate": 3.9763370913934554e-05, + "loss": 1.3111, + "step": 369 + }, + { + "epoch": 0.08, + "learning_rate": 3.976127640336889e-05, + "loss": 1.3528, + "step": 370 + }, + { + "epoch": 0.08, + "learning_rate": 3.9759172719526876e-05, + "loss": 1.3229, + "step": 371 + }, + { + "epoch": 0.08, + "learning_rate": 3.975705986338505e-05, + "loss": 1.3766, + "step": 372 + }, + { + "epoch": 0.08, + "learning_rate": 3.9754937835924214e-05, + "loss": 1.3366, + "step": 373 + }, + { + "epoch": 0.08, + "learning_rate": 3.9752806638129435e-05, + "loss": 1.2942, + "step": 374 + }, + { + "epoch": 0.08, + "learning_rate": 3.975066627099e-05, + "loss": 1.3489, + "step": 375 + }, + { + "epoch": 0.08, + "learning_rate": 3.974851673549951e-05, + "loss": 1.318, + "step": 376 + }, + { + "epoch": 0.08, + "learning_rate": 3.9746358032655764e-05, + "loss": 1.3541, + "step": 377 + }, + { + "epoch": 0.08, + "learning_rate": 3.974419016346084e-05, + "loss": 1.3754, + "step": 378 + }, + { + "epoch": 0.08, + "learning_rate": 3.974201312892109e-05, + "loss": 1.379, + "step": 379 + }, + { + "epoch": 0.08, + "learning_rate": 3.9739826930047095e-05, + "loss": 1.3513, + "step": 380 + }, + { + "epoch": 0.08, + "learning_rate": 3.9737631567853695e-05, + "loss": 1.3605, + "step": 381 + }, + { + "epoch": 0.08, + "learning_rate": 3.9735427043359996e-05, + "loss": 1.3699, + "step": 382 + }, + { + "epoch": 0.08, + "learning_rate": 3.973321335758934e-05, + "loss": 1.3238, + "step": 383 + }, + { + "epoch": 0.08, + "learning_rate": 3.973099051156934e-05, + "loss": 1.3938, + "step": 384 + }, + { + "epoch": 0.08, + "learning_rate": 3.972875850633184e-05, + "loss": 1.3824, + "step": 385 + }, + { + "epoch": 0.08, + "learning_rate": 3.9726517342912954e-05, + "loss": 1.3792, + "step": 386 + }, + { + "epoch": 0.08, + "learning_rate": 3.972426702235304e-05, + "loss": 1.3718, + "step": 387 + }, + { + "epoch": 0.08, + "learning_rate": 3.972200754569671e-05, + "loss": 1.3514, + "step": 388 + }, + { + "epoch": 0.08, + "learning_rate": 3.9719738913992815e-05, + "loss": 1.3538, + "step": 389 + }, + { + "epoch": 0.08, + "learning_rate": 3.971746112829447e-05, + "loss": 1.3107, + "step": 390 + }, + { + "epoch": 0.08, + "learning_rate": 3.9715174189659036e-05, + "loss": 1.329, + "step": 391 + }, + { + "epoch": 0.08, + "learning_rate": 3.971287809914811e-05, + "loss": 1.3611, + "step": 392 + }, + { + "epoch": 0.08, + "learning_rate": 3.971057285782757e-05, + "loss": 1.3872, + "step": 393 + }, + { + "epoch": 0.08, + "learning_rate": 3.970825846676749e-05, + "loss": 1.3523, + "step": 394 + }, + { + "epoch": 0.08, + "learning_rate": 3.970593492704225e-05, + "loss": 1.3117, + "step": 395 + }, + { + "epoch": 0.08, + "learning_rate": 3.9703602239730425e-05, + "loss": 1.3709, + "step": 396 + }, + { + "epoch": 0.08, + "learning_rate": 3.9701260405914874e-05, + "loss": 1.3922, + "step": 397 + }, + { + "epoch": 0.08, + "learning_rate": 3.9698909426682674e-05, + "loss": 1.3415, + "step": 398 + }, + { + "epoch": 0.08, + "learning_rate": 3.9696549303125176e-05, + "loss": 1.3688, + "step": 399 + }, + { + "epoch": 0.08, + "learning_rate": 3.969418003633795e-05, + "loss": 1.3396, + "step": 400 + }, + { + "epoch": 0.08, + "learning_rate": 3.969180162742082e-05, + "loss": 1.3672, + "step": 401 + }, + { + "epoch": 0.08, + "learning_rate": 3.9689414077477865e-05, + "loss": 1.3191, + "step": 402 + }, + { + "epoch": 0.08, + "learning_rate": 3.968701738761739e-05, + "loss": 1.2855, + "step": 403 + }, + { + "epoch": 0.08, + "learning_rate": 3.968461155895194e-05, + "loss": 1.3558, + "step": 404 + }, + { + "epoch": 0.09, + "learning_rate": 3.9682196592598324e-05, + "loss": 1.3334, + "step": 405 + }, + { + "epoch": 0.09, + "learning_rate": 3.967977248967758e-05, + "loss": 1.3646, + "step": 406 + }, + { + "epoch": 0.09, + "learning_rate": 3.967733925131498e-05, + "loss": 1.3379, + "step": 407 + }, + { + "epoch": 0.09, + "learning_rate": 3.9674896878640054e-05, + "loss": 1.3785, + "step": 408 + }, + { + "epoch": 0.09, + "learning_rate": 3.9672445372786565e-05, + "loss": 1.287, + "step": 409 + }, + { + "epoch": 0.09, + "learning_rate": 3.96699847348925e-05, + "loss": 1.3839, + "step": 410 + }, + { + "epoch": 0.09, + "learning_rate": 3.966751496610011e-05, + "loss": 1.3697, + "step": 411 + }, + { + "epoch": 0.09, + "learning_rate": 3.966503606755586e-05, + "loss": 1.3358, + "step": 412 + }, + { + "epoch": 0.09, + "learning_rate": 3.9662548040410485e-05, + "loss": 1.3074, + "step": 413 + }, + { + "epoch": 0.09, + "learning_rate": 3.9660050885818925e-05, + "loss": 1.3352, + "step": 414 + }, + { + "epoch": 0.09, + "learning_rate": 3.965754460494037e-05, + "loss": 1.2467, + "step": 415 + }, + { + "epoch": 0.09, + "learning_rate": 3.9655029198938256e-05, + "loss": 1.3878, + "step": 416 + }, + { + "epoch": 0.09, + "learning_rate": 3.965250466898024e-05, + "loss": 1.3401, + "step": 417 + }, + { + "epoch": 0.09, + "learning_rate": 3.964997101623823e-05, + "loss": 1.3288, + "step": 418 + }, + { + "epoch": 0.09, + "learning_rate": 3.964742824188834e-05, + "loss": 1.2528, + "step": 419 + }, + { + "epoch": 0.09, + "learning_rate": 3.9644876347110956e-05, + "loss": 1.3386, + "step": 420 + }, + { + "epoch": 0.09, + "learning_rate": 3.964231533309067e-05, + "loss": 1.3433, + "step": 421 + }, + { + "epoch": 0.09, + "learning_rate": 3.963974520101632e-05, + "loss": 1.3288, + "step": 422 + }, + { + "epoch": 0.09, + "learning_rate": 3.963716595208098e-05, + "loss": 1.3128, + "step": 423 + }, + { + "epoch": 0.09, + "learning_rate": 3.963457758748193e-05, + "loss": 1.3677, + "step": 424 + }, + { + "epoch": 0.09, + "learning_rate": 3.963198010842073e-05, + "loss": 1.3343, + "step": 425 + }, + { + "epoch": 0.09, + "learning_rate": 3.9629373516103114e-05, + "loss": 1.325, + "step": 426 + }, + { + "epoch": 0.09, + "learning_rate": 3.9626757811739084e-05, + "loss": 1.3301, + "step": 427 + }, + { + "epoch": 0.09, + "learning_rate": 3.962413299654286e-05, + "loss": 1.3561, + "step": 428 + }, + { + "epoch": 0.09, + "learning_rate": 3.962149907173291e-05, + "loss": 1.3344, + "step": 429 + }, + { + "epoch": 0.09, + "learning_rate": 3.961885603853189e-05, + "loss": 1.3034, + "step": 430 + }, + { + "epoch": 0.09, + "learning_rate": 3.9616203898166724e-05, + "loss": 1.3253, + "step": 431 + }, + { + "epoch": 0.09, + "learning_rate": 3.961354265186854e-05, + "loss": 1.3421, + "step": 432 + }, + { + "epoch": 0.09, + "learning_rate": 3.9610872300872704e-05, + "loss": 1.3792, + "step": 433 + }, + { + "epoch": 0.09, + "learning_rate": 3.96081928464188e-05, + "loss": 1.4123, + "step": 434 + }, + { + "epoch": 0.09, + "learning_rate": 3.960550428975066e-05, + "loss": 1.3507, + "step": 435 + }, + { + "epoch": 0.09, + "learning_rate": 3.9602806632116304e-05, + "loss": 1.362, + "step": 436 + }, + { + "epoch": 0.09, + "learning_rate": 3.960009987476801e-05, + "loss": 1.3853, + "step": 437 + }, + { + "epoch": 0.09, + "learning_rate": 3.959738401896227e-05, + "loss": 1.3272, + "step": 438 + }, + { + "epoch": 0.09, + "learning_rate": 3.9594659065959774e-05, + "loss": 1.3299, + "step": 439 + }, + { + "epoch": 0.09, + "learning_rate": 3.959192501702548e-05, + "loss": 1.3923, + "step": 440 + }, + { + "epoch": 0.09, + "learning_rate": 3.958918187342855e-05, + "loss": 1.3553, + "step": 441 + }, + { + "epoch": 0.09, + "learning_rate": 3.9586429636442346e-05, + "loss": 1.3334, + "step": 442 + }, + { + "epoch": 0.09, + "learning_rate": 3.958366830734448e-05, + "loss": 1.3455, + "step": 443 + }, + { + "epoch": 0.09, + "learning_rate": 3.958089788741677e-05, + "loss": 1.3692, + "step": 444 + }, + { + "epoch": 0.09, + "learning_rate": 3.957811837794526e-05, + "loss": 1.3555, + "step": 445 + }, + { + "epoch": 0.09, + "learning_rate": 3.9575329780220215e-05, + "loss": 1.2919, + "step": 446 + }, + { + "epoch": 0.09, + "learning_rate": 3.957253209553611e-05, + "loss": 1.3158, + "step": 447 + }, + { + "epoch": 0.09, + "learning_rate": 3.956972532519164e-05, + "loss": 1.2939, + "step": 448 + }, + { + "epoch": 0.09, + "learning_rate": 3.956690947048972e-05, + "loss": 1.392, + "step": 449 + }, + { + "epoch": 0.09, + "learning_rate": 3.9564084532737495e-05, + "loss": 1.3587, + "step": 450 + }, + { + "epoch": 0.09, + "learning_rate": 3.9561250513246306e-05, + "loss": 1.3468, + "step": 451 + }, + { + "epoch": 0.1, + "learning_rate": 3.955840741333171e-05, + "loss": 1.3749, + "step": 452 + }, + { + "epoch": 0.1, + "learning_rate": 3.9555555234313506e-05, + "loss": 1.3428, + "step": 453 + }, + { + "epoch": 0.1, + "learning_rate": 3.9552693977515675e-05, + "loss": 1.359, + "step": 454 + }, + { + "epoch": 0.1, + "learning_rate": 3.9549823644266434e-05, + "loss": 1.4116, + "step": 455 + }, + { + "epoch": 0.1, + "learning_rate": 3.9546944235898194e-05, + "loss": 1.2855, + "step": 456 + }, + { + "epoch": 0.1, + "learning_rate": 3.954405575374759e-05, + "loss": 1.3296, + "step": 457 + }, + { + "epoch": 0.1, + "learning_rate": 3.954115819915549e-05, + "loss": 1.3525, + "step": 458 + }, + { + "epoch": 0.1, + "learning_rate": 3.9538251573466926e-05, + "loss": 1.3051, + "step": 459 + }, + { + "epoch": 0.1, + "learning_rate": 3.9535335878031185e-05, + "loss": 1.3067, + "step": 460 + }, + { + "epoch": 0.1, + "learning_rate": 3.953241111420174e-05, + "loss": 1.3688, + "step": 461 + }, + { + "epoch": 0.1, + "learning_rate": 3.9529477283336274e-05, + "loss": 1.3524, + "step": 462 + }, + { + "epoch": 0.1, + "learning_rate": 3.9526534386796696e-05, + "loss": 1.3613, + "step": 463 + }, + { + "epoch": 0.1, + "learning_rate": 3.95235824259491e-05, + "loss": 1.3574, + "step": 464 + }, + { + "epoch": 0.1, + "learning_rate": 3.952062140216381e-05, + "loss": 1.3534, + "step": 465 + }, + { + "epoch": 0.1, + "learning_rate": 3.951765131681535e-05, + "loss": 1.3318, + "step": 466 + }, + { + "epoch": 0.1, + "learning_rate": 3.9514672171282435e-05, + "loss": 1.3822, + "step": 467 + }, + { + "epoch": 0.1, + "learning_rate": 3.951168396694801e-05, + "loss": 1.308, + "step": 468 + }, + { + "epoch": 0.1, + "learning_rate": 3.9508686705199196e-05, + "loss": 1.3783, + "step": 469 + }, + { + "epoch": 0.1, + "learning_rate": 3.950568038742736e-05, + "loss": 1.3454, + "step": 470 + }, + { + "epoch": 0.1, + "learning_rate": 3.950266501502803e-05, + "loss": 1.3656, + "step": 471 + }, + { + "epoch": 0.1, + "learning_rate": 3.9499640589400964e-05, + "loss": 1.3865, + "step": 472 + }, + { + "epoch": 0.1, + "learning_rate": 3.949660711195011e-05, + "loss": 1.3157, + "step": 473 + }, + { + "epoch": 0.1, + "learning_rate": 3.949356458408363e-05, + "loss": 1.3184, + "step": 474 + }, + { + "epoch": 0.1, + "learning_rate": 3.9490513007213874e-05, + "loss": 1.373, + "step": 475 + }, + { + "epoch": 0.1, + "learning_rate": 3.94874523827574e-05, + "loss": 1.2863, + "step": 476 + }, + { + "epoch": 0.1, + "learning_rate": 3.9484382712134956e-05, + "loss": 1.3428, + "step": 477 + }, + { + "epoch": 0.1, + "learning_rate": 3.9481303996771505e-05, + "loss": 1.3618, + "step": 478 + }, + { + "epoch": 0.1, + "learning_rate": 3.9478216238096206e-05, + "loss": 1.3291, + "step": 479 + }, + { + "epoch": 0.1, + "learning_rate": 3.94751194375424e-05, + "loss": 1.293, + "step": 480 + }, + { + "epoch": 0.1, + "learning_rate": 3.9472013596547646e-05, + "loss": 1.3879, + "step": 481 + }, + { + "epoch": 0.1, + "learning_rate": 3.946889871655368e-05, + "loss": 1.3692, + "step": 482 + }, + { + "epoch": 0.1, + "learning_rate": 3.946577479900645e-05, + "loss": 1.3788, + "step": 483 + }, + { + "epoch": 0.1, + "learning_rate": 3.9462641845356096e-05, + "loss": 1.335, + "step": 484 + }, + { + "epoch": 0.1, + "learning_rate": 3.945949985705694e-05, + "loss": 1.3058, + "step": 485 + }, + { + "epoch": 0.1, + "learning_rate": 3.945634883556752e-05, + "loss": 1.3531, + "step": 486 + }, + { + "epoch": 0.1, + "learning_rate": 3.945318878235054e-05, + "loss": 1.2844, + "step": 487 + }, + { + "epoch": 0.1, + "learning_rate": 3.945001969887293e-05, + "loss": 1.3488, + "step": 488 + }, + { + "epoch": 0.1, + "learning_rate": 3.944684158660577e-05, + "loss": 1.3626, + "step": 489 + }, + { + "epoch": 0.1, + "learning_rate": 3.944365444702437e-05, + "loss": 1.3428, + "step": 490 + }, + { + "epoch": 0.1, + "learning_rate": 3.944045828160822e-05, + "loss": 1.3372, + "step": 491 + }, + { + "epoch": 0.1, + "learning_rate": 3.943725309184098e-05, + "loss": 1.3614, + "step": 492 + }, + { + "epoch": 0.1, + "learning_rate": 3.943403887921052e-05, + "loss": 1.3507, + "step": 493 + }, + { + "epoch": 0.1, + "learning_rate": 3.94308156452089e-05, + "loss": 1.3577, + "step": 494 + }, + { + "epoch": 0.1, + "learning_rate": 3.9427583391332354e-05, + "loss": 1.3446, + "step": 495 + }, + { + "epoch": 0.1, + "learning_rate": 3.94243421190813e-05, + "loss": 1.3446, + "step": 496 + }, + { + "epoch": 0.1, + "learning_rate": 3.9421091829960364e-05, + "loss": 1.3453, + "step": 497 + }, + { + "epoch": 0.1, + "learning_rate": 3.9417832525478344e-05, + "loss": 1.3483, + "step": 498 + }, + { + "epoch": 0.1, + "learning_rate": 3.941456420714822e-05, + "loss": 1.3611, + "step": 499 + }, + { + "epoch": 0.11, + "learning_rate": 3.941128687648717e-05, + "loss": 1.332, + "step": 500 + }, + { + "epoch": 0.11, + "learning_rate": 3.940800053501653e-05, + "loss": 1.3141, + "step": 501 + }, + { + "epoch": 0.11, + "learning_rate": 3.9404705184261846e-05, + "loss": 1.3314, + "step": 502 + }, + { + "epoch": 0.11, + "learning_rate": 3.9401400825752835e-05, + "loss": 1.3375, + "step": 503 + }, + { + "epoch": 0.11, + "learning_rate": 3.939808746102339e-05, + "loss": 1.3588, + "step": 504 + }, + { + "epoch": 0.11, + "learning_rate": 3.9394765091611596e-05, + "loss": 1.3301, + "step": 505 + }, + { + "epoch": 0.11, + "learning_rate": 3.939143371905971e-05, + "loss": 1.323, + "step": 506 + }, + { + "epoch": 0.11, + "learning_rate": 3.938809334491417e-05, + "loss": 1.3438, + "step": 507 + }, + { + "epoch": 0.11, + "learning_rate": 3.9384743970725596e-05, + "loss": 1.3791, + "step": 508 + }, + { + "epoch": 0.11, + "learning_rate": 3.938138559804878e-05, + "loss": 1.3477, + "step": 509 + }, + { + "epoch": 0.11, + "learning_rate": 3.9378018228442696e-05, + "loss": 1.3494, + "step": 510 + }, + { + "epoch": 0.11, + "learning_rate": 3.937464186347049e-05, + "loss": 1.2981, + "step": 511 + }, + { + "epoch": 0.11, + "learning_rate": 3.9371256504699486e-05, + "loss": 1.3292, + "step": 512 + }, + { + "epoch": 0.11, + "learning_rate": 3.936786215370119e-05, + "loss": 1.3768, + "step": 513 + }, + { + "epoch": 0.11, + "learning_rate": 3.936445881205127e-05, + "loss": 1.3215, + "step": 514 + }, + { + "epoch": 0.11, + "learning_rate": 3.936104648132957e-05, + "loss": 1.3193, + "step": 515 + }, + { + "epoch": 0.11, + "learning_rate": 3.935762516312012e-05, + "loss": 1.3531, + "step": 516 + }, + { + "epoch": 0.11, + "learning_rate": 3.9354194859011105e-05, + "loss": 1.3579, + "step": 517 + }, + { + "epoch": 0.11, + "learning_rate": 3.935075557059488e-05, + "loss": 1.3258, + "step": 518 + }, + { + "epoch": 0.11, + "learning_rate": 3.934730729946799e-05, + "loss": 1.3648, + "step": 519 + }, + { + "epoch": 0.11, + "learning_rate": 3.9343850047231144e-05, + "loss": 1.2782, + "step": 520 + }, + { + "epoch": 0.11, + "learning_rate": 3.9340383815489204e-05, + "loss": 1.3193, + "step": 521 + }, + { + "epoch": 0.11, + "learning_rate": 3.933690860585121e-05, + "loss": 1.4034, + "step": 522 + }, + { + "epoch": 0.11, + "learning_rate": 3.933342441993037e-05, + "loss": 1.3484, + "step": 523 + }, + { + "epoch": 0.11, + "learning_rate": 3.932993125934407e-05, + "loss": 1.3101, + "step": 524 + }, + { + "epoch": 0.11, + "learning_rate": 3.932642912571385e-05, + "loss": 1.3515, + "step": 525 + }, + { + "epoch": 0.11, + "learning_rate": 3.932291802066539e-05, + "loss": 1.3277, + "step": 526 + }, + { + "epoch": 0.11, + "learning_rate": 3.93193979458286e-05, + "loss": 1.3594, + "step": 527 + }, + { + "epoch": 0.11, + "learning_rate": 3.93158689028375e-05, + "loss": 1.329, + "step": 528 + }, + { + "epoch": 0.11, + "learning_rate": 3.931233089333027e-05, + "loss": 1.3146, + "step": 529 + }, + { + "epoch": 0.11, + "learning_rate": 3.9308783918949296e-05, + "loss": 1.3698, + "step": 530 + }, + { + "epoch": 0.11, + "learning_rate": 3.9305227981341085e-05, + "loss": 1.349, + "step": 531 + }, + { + "epoch": 0.11, + "learning_rate": 3.930166308215633e-05, + "loss": 1.3225, + "step": 532 + }, + { + "epoch": 0.11, + "learning_rate": 3.929808922304987e-05, + "loss": 1.3138, + "step": 533 + }, + { + "epoch": 0.11, + "learning_rate": 3.92945064056807e-05, + "loss": 1.3343, + "step": 534 + }, + { + "epoch": 0.11, + "learning_rate": 3.929091463171199e-05, + "loss": 1.3186, + "step": 535 + }, + { + "epoch": 0.11, + "learning_rate": 3.928731390281105e-05, + "loss": 1.3135, + "step": 536 + }, + { + "epoch": 0.11, + "learning_rate": 3.928370422064936e-05, + "loss": 1.3425, + "step": 537 + }, + { + "epoch": 0.11, + "learning_rate": 3.928008558690255e-05, + "loss": 1.3065, + "step": 538 + }, + { + "epoch": 0.11, + "learning_rate": 3.927645800325041e-05, + "loss": 1.3098, + "step": 539 + }, + { + "epoch": 0.11, + "learning_rate": 3.927282147137688e-05, + "loss": 1.3559, + "step": 540 + }, + { + "epoch": 0.11, + "learning_rate": 3.9269175992970055e-05, + "loss": 1.334, + "step": 541 + }, + { + "epoch": 0.11, + "learning_rate": 3.9265521569722176e-05, + "loss": 1.3478, + "step": 542 + }, + { + "epoch": 0.11, + "learning_rate": 3.926185820332965e-05, + "loss": 1.3672, + "step": 543 + }, + { + "epoch": 0.11, + "learning_rate": 3.9258185895493026e-05, + "loss": 1.3822, + "step": 544 + }, + { + "epoch": 0.11, + "learning_rate": 3.925450464791701e-05, + "loss": 1.3705, + "step": 545 + }, + { + "epoch": 0.11, + "learning_rate": 3.925081446231045e-05, + "loss": 1.3478, + "step": 546 + }, + { + "epoch": 0.12, + "learning_rate": 3.924711534038635e-05, + "loss": 1.3549, + "step": 547 + }, + { + "epoch": 0.12, + "learning_rate": 3.9243407283861866e-05, + "loss": 1.2949, + "step": 548 + }, + { + "epoch": 0.12, + "learning_rate": 3.923969029445828e-05, + "loss": 1.3545, + "step": 549 + }, + { + "epoch": 0.12, + "learning_rate": 3.923596437390105e-05, + "loss": 1.355, + "step": 550 + }, + { + "epoch": 0.12, + "learning_rate": 3.923222952391975e-05, + "loss": 1.2885, + "step": 551 + }, + { + "epoch": 0.12, + "learning_rate": 3.9228485746248134e-05, + "loss": 1.3509, + "step": 552 + }, + { + "epoch": 0.12, + "learning_rate": 3.922473304262406e-05, + "loss": 1.3077, + "step": 553 + }, + { + "epoch": 0.12, + "learning_rate": 3.922097141478957e-05, + "loss": 1.3247, + "step": 554 + }, + { + "epoch": 0.12, + "learning_rate": 3.921720086449082e-05, + "loss": 1.3522, + "step": 555 + }, + { + "epoch": 0.12, + "learning_rate": 3.921342139347811e-05, + "loss": 1.354, + "step": 556 + }, + { + "epoch": 0.12, + "learning_rate": 3.92096330035059e-05, + "loss": 1.3082, + "step": 557 + }, + { + "epoch": 0.12, + "learning_rate": 3.9205835696332775e-05, + "loss": 1.358, + "step": 558 + }, + { + "epoch": 0.12, + "learning_rate": 3.920202947372146e-05, + "loss": 1.3389, + "step": 559 + }, + { + "epoch": 0.12, + "learning_rate": 3.919821433743882e-05, + "loss": 1.3166, + "step": 560 + }, + { + "epoch": 0.12, + "learning_rate": 3.919439028925587e-05, + "loss": 1.3377, + "step": 561 + }, + { + "epoch": 0.12, + "learning_rate": 3.919055733094774e-05, + "loss": 1.3287, + "step": 562 + }, + { + "epoch": 0.12, + "learning_rate": 3.91867154642937e-05, + "loss": 1.3511, + "step": 563 + }, + { + "epoch": 0.12, + "learning_rate": 3.918286469107718e-05, + "loss": 1.3573, + "step": 564 + }, + { + "epoch": 0.12, + "learning_rate": 3.917900501308572e-05, + "loss": 1.3141, + "step": 565 + }, + { + "epoch": 0.12, + "learning_rate": 3.9175136432111e-05, + "loss": 1.3611, + "step": 566 + }, + { + "epoch": 0.12, + "learning_rate": 3.9171258949948827e-05, + "loss": 1.3282, + "step": 567 + }, + { + "epoch": 0.12, + "learning_rate": 3.916737256839916e-05, + "loss": 1.3067, + "step": 568 + }, + { + "epoch": 0.12, + "learning_rate": 3.916347728926606e-05, + "loss": 1.3499, + "step": 569 + }, + { + "epoch": 0.12, + "learning_rate": 3.915957311435774e-05, + "loss": 1.2912, + "step": 570 + }, + { + "epoch": 0.12, + "learning_rate": 3.915566004548654e-05, + "loss": 1.3769, + "step": 571 + }, + { + "epoch": 0.12, + "learning_rate": 3.915173808446892e-05, + "loss": 1.2684, + "step": 572 + }, + { + "epoch": 0.12, + "learning_rate": 3.914780723312548e-05, + "loss": 1.3661, + "step": 573 + }, + { + "epoch": 0.12, + "learning_rate": 3.914386749328093e-05, + "loss": 1.3749, + "step": 574 + }, + { + "epoch": 0.12, + "learning_rate": 3.913991886676412e-05, + "loss": 1.3324, + "step": 575 + }, + { + "epoch": 0.12, + "learning_rate": 3.9135961355408024e-05, + "loss": 1.2841, + "step": 576 + }, + { + "epoch": 0.12, + "learning_rate": 3.913199496104972e-05, + "loss": 1.3472, + "step": 577 + }, + { + "epoch": 0.12, + "learning_rate": 3.912801968553045e-05, + "loss": 1.317, + "step": 578 + }, + { + "epoch": 0.12, + "learning_rate": 3.9124035530695546e-05, + "loss": 1.3709, + "step": 579 + }, + { + "epoch": 0.12, + "learning_rate": 3.912004249839447e-05, + "loss": 1.3045, + "step": 580 + }, + { + "epoch": 0.12, + "learning_rate": 3.91160405904808e-05, + "loss": 1.3511, + "step": 581 + }, + { + "epoch": 0.12, + "learning_rate": 3.911202980881226e-05, + "loss": 1.3596, + "step": 582 + }, + { + "epoch": 0.12, + "learning_rate": 3.910801015525064e-05, + "loss": 1.3468, + "step": 583 + }, + { + "epoch": 0.12, + "learning_rate": 3.910398163166192e-05, + "loss": 1.3168, + "step": 584 + }, + { + "epoch": 0.12, + "learning_rate": 3.909994423991614e-05, + "loss": 1.3635, + "step": 585 + }, + { + "epoch": 0.12, + "learning_rate": 3.909589798188747e-05, + "loss": 1.3195, + "step": 586 + }, + { + "epoch": 0.12, + "learning_rate": 3.909184285945421e-05, + "loss": 1.3587, + "step": 587 + }, + { + "epoch": 0.12, + "learning_rate": 3.908777887449877e-05, + "loss": 1.3586, + "step": 588 + }, + { + "epoch": 0.12, + "learning_rate": 3.9083706028907665e-05, + "loss": 1.3146, + "step": 589 + }, + { + "epoch": 0.12, + "learning_rate": 3.9079624324571536e-05, + "loss": 1.3445, + "step": 590 + }, + { + "epoch": 0.12, + "learning_rate": 3.9075533763385116e-05, + "loss": 1.4044, + "step": 591 + }, + { + "epoch": 0.12, + "learning_rate": 3.9071434347247275e-05, + "loss": 1.3535, + "step": 592 + }, + { + "epoch": 0.12, + "learning_rate": 3.906732607806098e-05, + "loss": 1.2993, + "step": 593 + }, + { + "epoch": 0.12, + "learning_rate": 3.906320895773329e-05, + "loss": 1.3229, + "step": 594 + }, + { + "epoch": 0.13, + "learning_rate": 3.905908298817543e-05, + "loss": 1.353, + "step": 595 + }, + { + "epoch": 0.13, + "learning_rate": 3.905494817130265e-05, + "loss": 1.3275, + "step": 596 + }, + { + "epoch": 0.13, + "learning_rate": 3.9050804509034383e-05, + "loss": 1.3252, + "step": 597 + }, + { + "epoch": 0.13, + "learning_rate": 3.904665200329411e-05, + "loss": 1.3534, + "step": 598 + }, + { + "epoch": 0.13, + "learning_rate": 3.904249065600948e-05, + "loss": 1.365, + "step": 599 + }, + { + "epoch": 0.13, + "learning_rate": 3.903832046911218e-05, + "loss": 1.3479, + "step": 600 + }, + { + "epoch": 0.13, + "learning_rate": 3.9034141444538034e-05, + "loss": 1.3279, + "step": 601 + }, + { + "epoch": 0.13, + "learning_rate": 3.902995358422697e-05, + "loss": 1.3512, + "step": 602 + }, + { + "epoch": 0.13, + "learning_rate": 3.902575689012301e-05, + "loss": 1.332, + "step": 603 + }, + { + "epoch": 0.13, + "learning_rate": 3.9021551364174286e-05, + "loss": 1.3107, + "step": 604 + }, + { + "epoch": 0.13, + "learning_rate": 3.901733700833301e-05, + "loss": 1.2952, + "step": 605 + }, + { + "epoch": 0.13, + "learning_rate": 3.9013113824555515e-05, + "loss": 1.2863, + "step": 606 + }, + { + "epoch": 0.13, + "learning_rate": 3.9008881814802225e-05, + "loss": 1.3365, + "step": 607 + }, + { + "epoch": 0.13, + "learning_rate": 3.900464098103765e-05, + "loss": 1.348, + "step": 608 + }, + { + "epoch": 0.13, + "learning_rate": 3.9000391325230405e-05, + "loss": 1.3923, + "step": 609 + }, + { + "epoch": 0.13, + "learning_rate": 3.899613284935321e-05, + "loss": 1.3236, + "step": 610 + }, + { + "epoch": 0.13, + "learning_rate": 3.899186555538286e-05, + "loss": 1.346, + "step": 611 + }, + { + "epoch": 0.13, + "learning_rate": 3.898758944530025e-05, + "loss": 1.3396, + "step": 612 + }, + { + "epoch": 0.13, + "learning_rate": 3.898330452109038e-05, + "loss": 1.3161, + "step": 613 + }, + { + "epoch": 0.13, + "learning_rate": 3.897901078474233e-05, + "loss": 1.3269, + "step": 614 + }, + { + "epoch": 0.13, + "learning_rate": 3.897470823824927e-05, + "loss": 1.3353, + "step": 615 + }, + { + "epoch": 0.13, + "learning_rate": 3.897039688360845e-05, + "loss": 1.3457, + "step": 616 + }, + { + "epoch": 0.13, + "learning_rate": 3.8966076722821245e-05, + "loss": 1.393, + "step": 617 + }, + { + "epoch": 0.13, + "learning_rate": 3.8961747757893075e-05, + "loss": 1.3907, + "step": 618 + }, + { + "epoch": 0.13, + "learning_rate": 3.895740999083347e-05, + "loss": 1.3242, + "step": 619 + }, + { + "epoch": 0.13, + "learning_rate": 3.8953063423656055e-05, + "loss": 1.2941, + "step": 620 + }, + { + "epoch": 0.13, + "learning_rate": 3.8948708058378504e-05, + "loss": 1.3333, + "step": 621 + }, + { + "epoch": 0.13, + "learning_rate": 3.894434389702261e-05, + "loss": 1.2923, + "step": 622 + }, + { + "epoch": 0.13, + "learning_rate": 3.8939970941614247e-05, + "loss": 1.3301, + "step": 623 + }, + { + "epoch": 0.13, + "learning_rate": 3.893558919418334e-05, + "loss": 1.332, + "step": 624 + }, + { + "epoch": 0.13, + "learning_rate": 3.893119865676393e-05, + "loss": 1.3211, + "step": 625 + }, + { + "epoch": 0.13, + "learning_rate": 3.892679933139412e-05, + "loss": 1.3203, + "step": 626 + }, + { + "epoch": 0.13, + "learning_rate": 3.8922391220116094e-05, + "loss": 1.3262, + "step": 627 + }, + { + "epoch": 0.13, + "learning_rate": 3.891797432497613e-05, + "loss": 1.325, + "step": 628 + }, + { + "epoch": 0.13, + "learning_rate": 3.891354864802455e-05, + "loss": 1.3373, + "step": 629 + }, + { + "epoch": 0.13, + "learning_rate": 3.89091141913158e-05, + "loss": 1.3773, + "step": 630 + }, + { + "epoch": 0.13, + "learning_rate": 3.890467095690837e-05, + "loss": 1.3327, + "step": 631 + }, + { + "epoch": 0.13, + "learning_rate": 3.890021894686481e-05, + "loss": 1.32, + "step": 632 + }, + { + "epoch": 0.13, + "learning_rate": 3.8895758163251783e-05, + "loss": 1.3501, + "step": 633 + }, + { + "epoch": 0.13, + "learning_rate": 3.889128860814e-05, + "loss": 1.3465, + "step": 634 + }, + { + "epoch": 0.13, + "learning_rate": 3.8886810283604245e-05, + "loss": 1.3617, + "step": 635 + }, + { + "epoch": 0.13, + "learning_rate": 3.888232319172338e-05, + "loss": 1.3613, + "step": 636 + }, + { + "epoch": 0.13, + "learning_rate": 3.887782733458034e-05, + "loss": 1.313, + "step": 637 + }, + { + "epoch": 0.13, + "learning_rate": 3.887332271426211e-05, + "loss": 1.3819, + "step": 638 + }, + { + "epoch": 0.13, + "learning_rate": 3.886880933285977e-05, + "loss": 1.3359, + "step": 639 + }, + { + "epoch": 0.13, + "learning_rate": 3.886428719246845e-05, + "loss": 1.2789, + "step": 640 + }, + { + "epoch": 0.13, + "learning_rate": 3.885975629518734e-05, + "loss": 1.3289, + "step": 641 + }, + { + "epoch": 0.14, + "learning_rate": 3.8855216643119697e-05, + "loss": 1.389, + "step": 642 + }, + { + "epoch": 0.14, + "learning_rate": 3.885066823837287e-05, + "loss": 1.3445, + "step": 643 + }, + { + "epoch": 0.14, + "learning_rate": 3.884611108305824e-05, + "loss": 1.2976, + "step": 644 + }, + { + "epoch": 0.14, + "learning_rate": 3.8841545179291254e-05, + "loss": 1.3225, + "step": 645 + }, + { + "epoch": 0.14, + "learning_rate": 3.883697052919143e-05, + "loss": 1.3322, + "step": 646 + }, + { + "epoch": 0.14, + "learning_rate": 3.883238713488235e-05, + "loss": 1.3157, + "step": 647 + }, + { + "epoch": 0.14, + "learning_rate": 3.882779499849163e-05, + "loss": 1.3124, + "step": 648 + }, + { + "epoch": 0.14, + "learning_rate": 3.8823194122150975e-05, + "loss": 1.3327, + "step": 649 + }, + { + "epoch": 0.14, + "learning_rate": 3.881858450799612e-05, + "loss": 1.3617, + "step": 650 + }, + { + "epoch": 0.14, + "learning_rate": 3.8813966158166894e-05, + "loss": 1.3349, + "step": 651 + }, + { + "epoch": 0.14, + "learning_rate": 3.8809339074807125e-05, + "loss": 1.3099, + "step": 652 + }, + { + "epoch": 0.14, + "learning_rate": 3.8804703260064756e-05, + "loss": 1.3449, + "step": 653 + }, + { + "epoch": 0.14, + "learning_rate": 3.880005871609173e-05, + "loss": 1.3623, + "step": 654 + }, + { + "epoch": 0.14, + "learning_rate": 3.879540544504408e-05, + "loss": 1.3599, + "step": 655 + }, + { + "epoch": 0.14, + "learning_rate": 3.879074344908187e-05, + "loss": 1.3152, + "step": 656 + }, + { + "epoch": 0.14, + "learning_rate": 3.878607273036922e-05, + "loss": 1.3683, + "step": 657 + }, + { + "epoch": 0.14, + "learning_rate": 3.8781393291074296e-05, + "loss": 1.3504, + "step": 658 + }, + { + "epoch": 0.14, + "learning_rate": 3.8776705133369333e-05, + "loss": 1.3143, + "step": 659 + }, + { + "epoch": 0.14, + "learning_rate": 3.8772008259430575e-05, + "loss": 1.3358, + "step": 660 + }, + { + "epoch": 0.14, + "learning_rate": 3.876730267143834e-05, + "loss": 1.3297, + "step": 661 + }, + { + "epoch": 0.14, + "learning_rate": 3.876258837157699e-05, + "loss": 1.3036, + "step": 662 + }, + { + "epoch": 0.14, + "learning_rate": 3.8757865362034914e-05, + "loss": 1.3259, + "step": 663 + }, + { + "epoch": 0.14, + "learning_rate": 3.875313364500456e-05, + "loss": 1.2908, + "step": 664 + }, + { + "epoch": 0.14, + "learning_rate": 3.8748393222682425e-05, + "loss": 1.3005, + "step": 665 + }, + { + "epoch": 0.14, + "learning_rate": 3.874364409726901e-05, + "loss": 1.3045, + "step": 666 + }, + { + "epoch": 0.14, + "learning_rate": 3.87388862709689e-05, + "loss": 1.3331, + "step": 667 + }, + { + "epoch": 0.14, + "learning_rate": 3.8734119745990696e-05, + "loss": 1.3348, + "step": 668 + }, + { + "epoch": 0.14, + "learning_rate": 3.872934452454704e-05, + "loss": 1.3533, + "step": 669 + }, + { + "epoch": 0.14, + "learning_rate": 3.872456060885461e-05, + "loss": 1.288, + "step": 670 + }, + { + "epoch": 0.14, + "learning_rate": 3.8719768001134124e-05, + "loss": 1.3604, + "step": 671 + }, + { + "epoch": 0.14, + "learning_rate": 3.871496670361033e-05, + "loss": 1.3343, + "step": 672 + }, + { + "epoch": 0.14, + "learning_rate": 3.871015671851202e-05, + "loss": 1.3566, + "step": 673 + }, + { + "epoch": 0.14, + "learning_rate": 3.870533804807201e-05, + "loss": 1.354, + "step": 674 + }, + { + "epoch": 0.14, + "learning_rate": 3.870051069452714e-05, + "loss": 1.3175, + "step": 675 + }, + { + "epoch": 0.14, + "learning_rate": 3.8695674660118294e-05, + "loss": 1.3104, + "step": 676 + }, + { + "epoch": 0.14, + "learning_rate": 3.8690829947090386e-05, + "loss": 1.3424, + "step": 677 + }, + { + "epoch": 0.14, + "learning_rate": 3.868597655769235e-05, + "loss": 1.3718, + "step": 678 + }, + { + "epoch": 0.14, + "learning_rate": 3.868111449417716e-05, + "loss": 1.3294, + "step": 679 + }, + { + "epoch": 0.14, + "learning_rate": 3.867624375880179e-05, + "loss": 1.2584, + "step": 680 + }, + { + "epoch": 0.14, + "learning_rate": 3.8671364353827284e-05, + "loss": 1.369, + "step": 681 + }, + { + "epoch": 0.14, + "learning_rate": 3.8666476281518665e-05, + "loss": 1.3192, + "step": 682 + }, + { + "epoch": 0.14, + "learning_rate": 3.8661579544145e-05, + "loss": 1.3639, + "step": 683 + }, + { + "epoch": 0.14, + "learning_rate": 3.8656674143979386e-05, + "loss": 1.2207, + "step": 684 + }, + { + "epoch": 0.14, + "learning_rate": 3.8651760083298926e-05, + "loss": 1.3189, + "step": 685 + }, + { + "epoch": 0.14, + "learning_rate": 3.864683736438475e-05, + "loss": 1.3861, + "step": 686 + }, + { + "epoch": 0.14, + "learning_rate": 3.8641905989522016e-05, + "loss": 1.3206, + "step": 687 + }, + { + "epoch": 0.14, + "learning_rate": 3.863696596099988e-05, + "loss": 1.317, + "step": 688 + }, + { + "epoch": 0.14, + "learning_rate": 3.863201728111153e-05, + "loss": 1.3675, + "step": 689 + }, + { + "epoch": 0.15, + "learning_rate": 3.862705995215417e-05, + "loss": 1.3608, + "step": 690 + }, + { + "epoch": 0.15, + "learning_rate": 3.862209397642901e-05, + "loss": 1.3174, + "step": 691 + }, + { + "epoch": 0.15, + "learning_rate": 3.861711935624129e-05, + "loss": 1.3038, + "step": 692 + }, + { + "epoch": 0.15, + "learning_rate": 3.8612136093900224e-05, + "loss": 1.2865, + "step": 693 + }, + { + "epoch": 0.15, + "learning_rate": 3.860714419171909e-05, + "loss": 1.3025, + "step": 694 + }, + { + "epoch": 0.15, + "learning_rate": 3.860214365201515e-05, + "loss": 1.3254, + "step": 695 + }, + { + "epoch": 0.15, + "learning_rate": 3.8597134477109674e-05, + "loss": 1.2729, + "step": 696 + }, + { + "epoch": 0.15, + "learning_rate": 3.8592116669327945e-05, + "loss": 1.3217, + "step": 697 + }, + { + "epoch": 0.15, + "learning_rate": 3.858709023099925e-05, + "loss": 1.3261, + "step": 698 + }, + { + "epoch": 0.15, + "learning_rate": 3.858205516445689e-05, + "loss": 1.3223, + "step": 699 + }, + { + "epoch": 0.15, + "learning_rate": 3.857701147203816e-05, + "loss": 1.3326, + "step": 700 + }, + { + "epoch": 0.15, + "learning_rate": 3.857195915608437e-05, + "loss": 1.3546, + "step": 701 + }, + { + "epoch": 0.15, + "learning_rate": 3.8566898218940825e-05, + "loss": 1.344, + "step": 702 + }, + { + "epoch": 0.15, + "learning_rate": 3.856182866295684e-05, + "loss": 1.3246, + "step": 703 + }, + { + "epoch": 0.15, + "learning_rate": 3.8556750490485724e-05, + "loss": 1.3234, + "step": 704 + }, + { + "epoch": 0.15, + "learning_rate": 3.855166370388479e-05, + "loss": 1.3433, + "step": 705 + }, + { + "epoch": 0.15, + "learning_rate": 3.8546568305515345e-05, + "loss": 1.3348, + "step": 706 + }, + { + "epoch": 0.15, + "learning_rate": 3.85414642977427e-05, + "loss": 1.3573, + "step": 707 + }, + { + "epoch": 0.15, + "learning_rate": 3.8536351682936155e-05, + "loss": 1.3409, + "step": 708 + }, + { + "epoch": 0.15, + "learning_rate": 3.8531230463469015e-05, + "loss": 1.3084, + "step": 709 + }, + { + "epoch": 0.15, + "learning_rate": 3.852610064171857e-05, + "loss": 1.3788, + "step": 710 + }, + { + "epoch": 0.15, + "learning_rate": 3.85209622200661e-05, + "loss": 1.3381, + "step": 711 + }, + { + "epoch": 0.15, + "learning_rate": 3.8515815200896905e-05, + "loss": 1.3015, + "step": 712 + }, + { + "epoch": 0.15, + "learning_rate": 3.851065958660023e-05, + "loss": 1.3084, + "step": 713 + }, + { + "epoch": 0.15, + "learning_rate": 3.8505495379569354e-05, + "loss": 1.3429, + "step": 714 + }, + { + "epoch": 0.15, + "learning_rate": 3.850032258220152e-05, + "loss": 1.3328, + "step": 715 + }, + { + "epoch": 0.15, + "learning_rate": 3.849514119689796e-05, + "loss": 1.3462, + "step": 716 + }, + { + "epoch": 0.15, + "learning_rate": 3.84899512260639e-05, + "loss": 1.2932, + "step": 717 + }, + { + "epoch": 0.15, + "learning_rate": 3.848475267210856e-05, + "loss": 1.3455, + "step": 718 + }, + { + "epoch": 0.15, + "learning_rate": 3.8479545537445115e-05, + "loss": 1.348, + "step": 719 + }, + { + "epoch": 0.15, + "learning_rate": 3.847432982449075e-05, + "loss": 1.3449, + "step": 720 + }, + { + "epoch": 0.15, + "learning_rate": 3.846910553566662e-05, + "loss": 1.2975, + "step": 721 + }, + { + "epoch": 0.15, + "learning_rate": 3.846387267339787e-05, + "loss": 1.3452, + "step": 722 + }, + { + "epoch": 0.15, + "learning_rate": 3.845863124011361e-05, + "loss": 1.3705, + "step": 723 + }, + { + "epoch": 0.15, + "learning_rate": 3.845338123824694e-05, + "loss": 1.3662, + "step": 724 + }, + { + "epoch": 0.15, + "learning_rate": 3.844812267023495e-05, + "loss": 1.3419, + "step": 725 + }, + { + "epoch": 0.15, + "learning_rate": 3.8442855538518667e-05, + "loss": 1.3088, + "step": 726 + }, + { + "epoch": 0.15, + "learning_rate": 3.8437579845543133e-05, + "loss": 1.2827, + "step": 727 + }, + { + "epoch": 0.15, + "learning_rate": 3.843229559375735e-05, + "loss": 1.2962, + "step": 728 + }, + { + "epoch": 0.15, + "learning_rate": 3.842700278561429e-05, + "loss": 1.2687, + "step": 729 + }, + { + "epoch": 0.15, + "learning_rate": 3.8421701423570895e-05, + "loss": 1.2588, + "step": 730 + }, + { + "epoch": 0.15, + "learning_rate": 3.841639151008809e-05, + "loss": 1.2686, + "step": 731 + }, + { + "epoch": 0.15, + "learning_rate": 3.8411073047630745e-05, + "loss": 1.3598, + "step": 732 + }, + { + "epoch": 0.15, + "learning_rate": 3.840574603866774e-05, + "loss": 1.309, + "step": 733 + }, + { + "epoch": 0.15, + "learning_rate": 3.840041048567188e-05, + "loss": 1.3183, + "step": 734 + }, + { + "epoch": 0.15, + "learning_rate": 3.839506639111996e-05, + "loss": 1.3085, + "step": 735 + }, + { + "epoch": 0.15, + "learning_rate": 3.838971375749272e-05, + "loss": 1.315, + "step": 736 + }, + { + "epoch": 0.16, + "learning_rate": 3.83843525872749e-05, + "loss": 1.3152, + "step": 737 + }, + { + "epoch": 0.16, + "learning_rate": 3.837898288295516e-05, + "loss": 1.2853, + "step": 738 + }, + { + "epoch": 0.16, + "learning_rate": 3.837360464702616e-05, + "loss": 1.3565, + "step": 739 + }, + { + "epoch": 0.16, + "learning_rate": 3.8368217881984484e-05, + "loss": 1.2939, + "step": 740 + }, + { + "epoch": 0.16, + "learning_rate": 3.83628225903307e-05, + "loss": 1.3342, + "step": 741 + }, + { + "epoch": 0.16, + "learning_rate": 3.8357418774569335e-05, + "loss": 1.2597, + "step": 742 + }, + { + "epoch": 0.16, + "learning_rate": 3.835200643720886e-05, + "loss": 1.2928, + "step": 743 + }, + { + "epoch": 0.16, + "learning_rate": 3.8346585580761705e-05, + "loss": 1.3252, + "step": 744 + }, + { + "epoch": 0.16, + "learning_rate": 3.8341156207744254e-05, + "loss": 1.3273, + "step": 745 + }, + { + "epoch": 0.16, + "learning_rate": 3.833571832067685e-05, + "loss": 1.3518, + "step": 746 + }, + { + "epoch": 0.16, + "learning_rate": 3.8330271922083795e-05, + "loss": 1.2859, + "step": 747 + }, + { + "epoch": 0.16, + "learning_rate": 3.8324817014493326e-05, + "loss": 1.3529, + "step": 748 + }, + { + "epoch": 0.16, + "learning_rate": 3.831935360043763e-05, + "loss": 1.3298, + "step": 749 + }, + { + "epoch": 0.16, + "learning_rate": 3.8313881682452854e-05, + "loss": 1.2983, + "step": 750 + }, + { + "epoch": 0.16, + "learning_rate": 3.830840126307909e-05, + "loss": 1.3388, + "step": 751 + }, + { + "epoch": 0.16, + "learning_rate": 3.830291234486037e-05, + "loss": 1.355, + "step": 752 + }, + { + "epoch": 0.16, + "learning_rate": 3.8297414930344684e-05, + "loss": 1.3197, + "step": 753 + }, + { + "epoch": 0.16, + "learning_rate": 3.829190902208394e-05, + "loss": 1.3258, + "step": 754 + }, + { + "epoch": 0.16, + "learning_rate": 3.828639462263403e-05, + "loss": 1.3177, + "step": 755 + }, + { + "epoch": 0.16, + "learning_rate": 3.8280871734554746e-05, + "loss": 1.3312, + "step": 756 + }, + { + "epoch": 0.16, + "learning_rate": 3.827534036040984e-05, + "loss": 1.3159, + "step": 757 + }, + { + "epoch": 0.16, + "learning_rate": 3.8269800502767e-05, + "loss": 1.3111, + "step": 758 + }, + { + "epoch": 0.16, + "learning_rate": 3.8264252164197866e-05, + "loss": 1.2511, + "step": 759 + }, + { + "epoch": 0.16, + "learning_rate": 3.825869534727799e-05, + "loss": 1.3241, + "step": 760 + }, + { + "epoch": 0.16, + "learning_rate": 3.8253130054586886e-05, + "loss": 1.2966, + "step": 761 + }, + { + "epoch": 0.16, + "learning_rate": 3.824755628870797e-05, + "loss": 1.3275, + "step": 762 + }, + { + "epoch": 0.16, + "learning_rate": 3.824197405222863e-05, + "loss": 1.3324, + "step": 763 + }, + { + "epoch": 0.16, + "learning_rate": 3.8236383347740146e-05, + "loss": 1.3197, + "step": 764 + }, + { + "epoch": 0.16, + "learning_rate": 3.823078417783777e-05, + "loss": 1.3572, + "step": 765 + }, + { + "epoch": 0.16, + "learning_rate": 3.8225176545120646e-05, + "loss": 1.3027, + "step": 766 + }, + { + "epoch": 0.16, + "learning_rate": 3.821956045219186e-05, + "loss": 1.3135, + "step": 767 + }, + { + "epoch": 0.16, + "learning_rate": 3.821393590165845e-05, + "loss": 1.3586, + "step": 768 + }, + { + "epoch": 0.16, + "learning_rate": 3.8208302896131344e-05, + "loss": 1.2514, + "step": 769 + }, + { + "epoch": 0.16, + "learning_rate": 3.820266143822541e-05, + "loss": 1.3219, + "step": 770 + }, + { + "epoch": 0.16, + "learning_rate": 3.819701153055944e-05, + "loss": 1.3377, + "step": 771 + }, + { + "epoch": 0.16, + "learning_rate": 3.8191353175756145e-05, + "loss": 1.3107, + "step": 772 + }, + { + "epoch": 0.16, + "learning_rate": 3.818568637644217e-05, + "loss": 1.36, + "step": 773 + }, + { + "epoch": 0.16, + "learning_rate": 3.8180011135248055e-05, + "loss": 1.3692, + "step": 774 + }, + { + "epoch": 0.16, + "learning_rate": 3.8174327454808275e-05, + "loss": 1.3231, + "step": 775 + }, + { + "epoch": 0.16, + "learning_rate": 3.816863533776124e-05, + "loss": 1.3213, + "step": 776 + }, + { + "epoch": 0.16, + "learning_rate": 3.816293478674923e-05, + "loss": 1.3449, + "step": 777 + }, + { + "epoch": 0.16, + "learning_rate": 3.815722580441849e-05, + "loss": 1.3097, + "step": 778 + }, + { + "epoch": 0.16, + "learning_rate": 3.815150839341915e-05, + "loss": 1.3468, + "step": 779 + }, + { + "epoch": 0.16, + "learning_rate": 3.8145782556405244e-05, + "loss": 1.3213, + "step": 780 + }, + { + "epoch": 0.16, + "learning_rate": 3.814004829603475e-05, + "loss": 1.3149, + "step": 781 + }, + { + "epoch": 0.16, + "learning_rate": 3.813430561496953e-05, + "loss": 1.3353, + "step": 782 + }, + { + "epoch": 0.16, + "learning_rate": 3.812855451587537e-05, + "loss": 1.3114, + "step": 783 + }, + { + "epoch": 0.16, + "learning_rate": 3.812279500142194e-05, + "loss": 1.3304, + "step": 784 + }, + { + "epoch": 0.17, + "learning_rate": 3.811702707428285e-05, + "loss": 1.3388, + "step": 785 + }, + { + "epoch": 0.17, + "learning_rate": 3.81112507371356e-05, + "loss": 1.2986, + "step": 786 + }, + { + "epoch": 0.17, + "learning_rate": 3.810546599266158e-05, + "loss": 1.3354, + "step": 787 + }, + { + "epoch": 0.17, + "learning_rate": 3.8099672843546106e-05, + "loss": 1.3486, + "step": 788 + }, + { + "epoch": 0.17, + "learning_rate": 3.809387129247838e-05, + "loss": 1.3586, + "step": 789 + }, + { + "epoch": 0.17, + "learning_rate": 3.808806134215151e-05, + "loss": 1.3622, + "step": 790 + }, + { + "epoch": 0.17, + "learning_rate": 3.80822429952625e-05, + "loss": 1.3256, + "step": 791 + }, + { + "epoch": 0.17, + "learning_rate": 3.8076416254512256e-05, + "loss": 1.3196, + "step": 792 + }, + { + "epoch": 0.17, + "learning_rate": 3.807058112260558e-05, + "loss": 1.2956, + "step": 793 + }, + { + "epoch": 0.17, + "learning_rate": 3.8064737602251155e-05, + "loss": 1.3438, + "step": 794 + }, + { + "epoch": 0.17, + "learning_rate": 3.8058885696161595e-05, + "loss": 1.34, + "step": 795 + }, + { + "epoch": 0.17, + "learning_rate": 3.805302540705335e-05, + "loss": 1.3227, + "step": 796 + }, + { + "epoch": 0.17, + "learning_rate": 3.8047156737646825e-05, + "loss": 1.3718, + "step": 797 + }, + { + "epoch": 0.17, + "learning_rate": 3.8041279690666254e-05, + "loss": 1.2667, + "step": 798 + }, + { + "epoch": 0.17, + "learning_rate": 3.803539426883982e-05, + "loss": 1.302, + "step": 799 + }, + { + "epoch": 0.17, + "learning_rate": 3.8029500474899544e-05, + "loss": 1.341, + "step": 800 + }, + { + "epoch": 0.17, + "learning_rate": 3.802359831158135e-05, + "loss": 1.343, + "step": 801 + }, + { + "epoch": 0.17, + "learning_rate": 3.801768778162506e-05, + "loss": 1.3368, + "step": 802 + }, + { + "epoch": 0.17, + "learning_rate": 3.8011768887774365e-05, + "loss": 1.3268, + "step": 803 + }, + { + "epoch": 0.17, + "learning_rate": 3.800584163277684e-05, + "loss": 1.3112, + "step": 804 + }, + { + "epoch": 0.17, + "learning_rate": 3.7999906019383954e-05, + "loss": 1.3869, + "step": 805 + }, + { + "epoch": 0.17, + "learning_rate": 3.799396205035104e-05, + "loss": 1.3264, + "step": 806 + }, + { + "epoch": 0.17, + "learning_rate": 3.7988009728437304e-05, + "loss": 1.3573, + "step": 807 + }, + { + "epoch": 0.17, + "learning_rate": 3.7982049056405866e-05, + "loss": 1.3474, + "step": 808 + }, + { + "epoch": 0.17, + "learning_rate": 3.797608003702368e-05, + "loss": 1.3319, + "step": 809 + }, + { + "epoch": 0.17, + "learning_rate": 3.79701026730616e-05, + "loss": 1.3339, + "step": 810 + }, + { + "epoch": 0.17, + "learning_rate": 3.796411696729434e-05, + "loss": 1.272, + "step": 811 + }, + { + "epoch": 0.17, + "learning_rate": 3.79581229225005e-05, + "loss": 1.3628, + "step": 812 + }, + { + "epoch": 0.17, + "learning_rate": 3.795212054146254e-05, + "loss": 1.3212, + "step": 813 + }, + { + "epoch": 0.17, + "learning_rate": 3.794610982696679e-05, + "loss": 1.3288, + "step": 814 + }, + { + "epoch": 0.17, + "learning_rate": 3.7940090781803454e-05, + "loss": 1.2961, + "step": 815 + }, + { + "epoch": 0.17, + "learning_rate": 3.7934063408766606e-05, + "loss": 1.3084, + "step": 816 + }, + { + "epoch": 0.17, + "learning_rate": 3.792802771065417e-05, + "loss": 1.328, + "step": 817 + }, + { + "epoch": 0.17, + "learning_rate": 3.792198369026796e-05, + "loss": 1.2938, + "step": 818 + }, + { + "epoch": 0.17, + "learning_rate": 3.791593135041362e-05, + "loss": 1.3381, + "step": 819 + }, + { + "epoch": 0.17, + "learning_rate": 3.790987069390069e-05, + "loss": 1.2808, + "step": 820 + }, + { + "epoch": 0.17, + "learning_rate": 3.790380172354255e-05, + "loss": 1.2853, + "step": 821 + }, + { + "epoch": 0.17, + "learning_rate": 3.789772444215644e-05, + "loss": 1.3335, + "step": 822 + }, + { + "epoch": 0.17, + "learning_rate": 3.7891638852563455e-05, + "loss": 1.3278, + "step": 823 + }, + { + "epoch": 0.17, + "learning_rate": 3.788554495758858e-05, + "loss": 1.2816, + "step": 824 + }, + { + "epoch": 0.17, + "learning_rate": 3.7879442760060604e-05, + "loss": 1.3327, + "step": 825 + }, + { + "epoch": 0.17, + "learning_rate": 3.78733322628122e-05, + "loss": 1.3372, + "step": 826 + }, + { + "epoch": 0.17, + "learning_rate": 3.786721346867991e-05, + "loss": 1.347, + "step": 827 + }, + { + "epoch": 0.17, + "learning_rate": 3.786108638050408e-05, + "loss": 1.3139, + "step": 828 + }, + { + "epoch": 0.17, + "learning_rate": 3.785495100112894e-05, + "loss": 1.3048, + "step": 829 + }, + { + "epoch": 0.17, + "learning_rate": 3.784880733340257e-05, + "loss": 1.3011, + "step": 830 + }, + { + "epoch": 0.17, + "learning_rate": 3.784265538017689e-05, + "loss": 1.3368, + "step": 831 + }, + { + "epoch": 0.18, + "learning_rate": 3.7836495144307644e-05, + "loss": 1.2558, + "step": 832 + }, + { + "epoch": 0.18, + "learning_rate": 3.783032662865447e-05, + "loss": 1.3549, + "step": 833 + }, + { + "epoch": 0.18, + "learning_rate": 3.782414983608081e-05, + "loss": 1.3048, + "step": 834 + }, + { + "epoch": 0.18, + "learning_rate": 3.7817964769453956e-05, + "loss": 1.326, + "step": 835 + }, + { + "epoch": 0.18, + "learning_rate": 3.781177143164505e-05, + "loss": 1.3329, + "step": 836 + }, + { + "epoch": 0.18, + "learning_rate": 3.7805569825529055e-05, + "loss": 1.32, + "step": 837 + }, + { + "epoch": 0.18, + "learning_rate": 3.779935995398481e-05, + "loss": 1.3174, + "step": 838 + }, + { + "epoch": 0.18, + "learning_rate": 3.7793141819894955e-05, + "loss": 1.3625, + "step": 839 + }, + { + "epoch": 0.18, + "learning_rate": 3.778691542614596e-05, + "loss": 1.3777, + "step": 840 + }, + { + "epoch": 0.18, + "learning_rate": 3.778068077562817e-05, + "loss": 1.3298, + "step": 841 + }, + { + "epoch": 0.18, + "learning_rate": 3.7774437871235724e-05, + "loss": 1.3167, + "step": 842 + }, + { + "epoch": 0.18, + "learning_rate": 3.776818671586662e-05, + "loss": 1.2919, + "step": 843 + }, + { + "epoch": 0.18, + "learning_rate": 3.776192731242265e-05, + "loss": 1.3498, + "step": 844 + }, + { + "epoch": 0.18, + "learning_rate": 3.775565966380949e-05, + "loss": 1.3421, + "step": 845 + }, + { + "epoch": 0.18, + "learning_rate": 3.774938377293659e-05, + "loss": 1.3182, + "step": 846 + }, + { + "epoch": 0.18, + "learning_rate": 3.774309964271725e-05, + "loss": 1.3455, + "step": 847 + }, + { + "epoch": 0.18, + "learning_rate": 3.7736807276068604e-05, + "loss": 1.325, + "step": 848 + }, + { + "epoch": 0.18, + "learning_rate": 3.773050667591158e-05, + "loss": 1.338, + "step": 849 + }, + { + "epoch": 0.18, + "learning_rate": 3.772419784517095e-05, + "loss": 1.31, + "step": 850 + }, + { + "epoch": 0.18, + "learning_rate": 3.771788078677532e-05, + "loss": 1.3155, + "step": 851 + }, + { + "epoch": 0.18, + "learning_rate": 3.771155550365708e-05, + "loss": 1.2947, + "step": 852 + }, + { + "epoch": 0.18, + "learning_rate": 3.770522199875247e-05, + "loss": 1.3655, + "step": 853 + }, + { + "epoch": 0.18, + "learning_rate": 3.7698880275001516e-05, + "loss": 1.3295, + "step": 854 + }, + { + "epoch": 0.18, + "learning_rate": 3.769253033534808e-05, + "loss": 1.3509, + "step": 855 + }, + { + "epoch": 0.18, + "learning_rate": 3.7686172182739845e-05, + "loss": 1.3094, + "step": 856 + }, + { + "epoch": 0.18, + "learning_rate": 3.767980582012828e-05, + "loss": 1.2749, + "step": 857 + }, + { + "epoch": 0.18, + "learning_rate": 3.7673431250468695e-05, + "loss": 1.3423, + "step": 858 + }, + { + "epoch": 0.18, + "learning_rate": 3.766704847672018e-05, + "loss": 1.3708, + "step": 859 + }, + { + "epoch": 0.18, + "learning_rate": 3.766065750184566e-05, + "loss": 1.3212, + "step": 860 + }, + { + "epoch": 0.18, + "learning_rate": 3.7654258328811856e-05, + "loss": 1.3406, + "step": 861 + }, + { + "epoch": 0.18, + "learning_rate": 3.764785096058927e-05, + "loss": 1.317, + "step": 862 + }, + { + "epoch": 0.18, + "learning_rate": 3.764143540015227e-05, + "loss": 1.2996, + "step": 863 + }, + { + "epoch": 0.18, + "learning_rate": 3.763501165047896e-05, + "loss": 1.3164, + "step": 864 + }, + { + "epoch": 0.18, + "learning_rate": 3.7628579714551285e-05, + "loss": 1.3208, + "step": 865 + }, + { + "epoch": 0.18, + "learning_rate": 3.7622139595354976e-05, + "loss": 1.3605, + "step": 866 + }, + { + "epoch": 0.18, + "learning_rate": 3.7615691295879574e-05, + "loss": 1.3146, + "step": 867 + }, + { + "epoch": 0.18, + "learning_rate": 3.76092348191184e-05, + "loss": 1.3372, + "step": 868 + }, + { + "epoch": 0.18, + "learning_rate": 3.7602770168068586e-05, + "loss": 1.302, + "step": 869 + }, + { + "epoch": 0.18, + "learning_rate": 3.759629734573105e-05, + "loss": 1.3072, + "step": 870 + }, + { + "epoch": 0.18, + "learning_rate": 3.758981635511051e-05, + "loss": 1.3293, + "step": 871 + }, + { + "epoch": 0.18, + "learning_rate": 3.758332719921547e-05, + "loss": 1.3123, + "step": 872 + }, + { + "epoch": 0.18, + "learning_rate": 3.757682988105823e-05, + "loss": 1.3352, + "step": 873 + }, + { + "epoch": 0.18, + "learning_rate": 3.7570324403654866e-05, + "loss": 1.2952, + "step": 874 + }, + { + "epoch": 0.18, + "learning_rate": 3.756381077002526e-05, + "loss": 1.3695, + "step": 875 + }, + { + "epoch": 0.18, + "learning_rate": 3.755728898319306e-05, + "loss": 1.3138, + "step": 876 + }, + { + "epoch": 0.18, + "learning_rate": 3.7550759046185726e-05, + "loss": 1.4168, + "step": 877 + }, + { + "epoch": 0.18, + "learning_rate": 3.7544220962034475e-05, + "loss": 1.3155, + "step": 878 + }, + { + "epoch": 0.18, + "learning_rate": 3.7537674733774315e-05, + "loss": 1.2633, + "step": 879 + }, + { + "epoch": 0.19, + "learning_rate": 3.753112036444404e-05, + "loss": 1.3867, + "step": 880 + }, + { + "epoch": 0.19, + "learning_rate": 3.752455785708622e-05, + "loss": 1.3262, + "step": 881 + }, + { + "epoch": 0.19, + "learning_rate": 3.7517987214747186e-05, + "loss": 1.3172, + "step": 882 + }, + { + "epoch": 0.19, + "learning_rate": 3.751140844047708e-05, + "loss": 1.3271, + "step": 883 + }, + { + "epoch": 0.19, + "learning_rate": 3.7504821537329795e-05, + "loss": 1.304, + "step": 884 + }, + { + "epoch": 0.19, + "learning_rate": 3.7498226508362996e-05, + "loss": 1.3103, + "step": 885 + }, + { + "epoch": 0.19, + "learning_rate": 3.749162335663813e-05, + "loss": 1.3819, + "step": 886 + }, + { + "epoch": 0.19, + "learning_rate": 3.7485012085220416e-05, + "loss": 1.3204, + "step": 887 + }, + { + "epoch": 0.19, + "learning_rate": 3.747839269717882e-05, + "loss": 1.3505, + "step": 888 + }, + { + "epoch": 0.19, + "learning_rate": 3.7471765195586115e-05, + "loss": 1.3184, + "step": 889 + }, + { + "epoch": 0.19, + "learning_rate": 3.74651295835188e-05, + "loss": 1.3156, + "step": 890 + }, + { + "epoch": 0.19, + "learning_rate": 3.745848586405717e-05, + "loss": 1.311, + "step": 891 + }, + { + "epoch": 0.19, + "learning_rate": 3.745183404028525e-05, + "loss": 1.306, + "step": 892 + }, + { + "epoch": 0.19, + "learning_rate": 3.7445174115290875e-05, + "loss": 1.3066, + "step": 893 + }, + { + "epoch": 0.19, + "learning_rate": 3.74385060921656e-05, + "loss": 1.3126, + "step": 894 + }, + { + "epoch": 0.19, + "learning_rate": 3.743182997400475e-05, + "loss": 1.2545, + "step": 895 + }, + { + "epoch": 0.19, + "learning_rate": 3.742514576390741e-05, + "loss": 1.372, + "step": 896 + }, + { + "epoch": 0.19, + "learning_rate": 3.741845346497643e-05, + "loss": 1.2692, + "step": 897 + }, + { + "epoch": 0.19, + "learning_rate": 3.741175308031839e-05, + "loss": 1.2986, + "step": 898 + }, + { + "epoch": 0.19, + "learning_rate": 3.740504461304366e-05, + "loss": 1.3442, + "step": 899 + }, + { + "epoch": 0.19, + "learning_rate": 3.739832806626632e-05, + "loss": 1.2773, + "step": 900 + }, + { + "epoch": 0.19, + "learning_rate": 3.7391603443104244e-05, + "loss": 1.2644, + "step": 901 + }, + { + "epoch": 0.19, + "learning_rate": 3.738487074667902e-05, + "loss": 1.3268, + "step": 902 + }, + { + "epoch": 0.19, + "learning_rate": 3.7378129980116e-05, + "loss": 1.3084, + "step": 903 + }, + { + "epoch": 0.19, + "learning_rate": 3.7371381146544276e-05, + "loss": 1.3093, + "step": 904 + }, + { + "epoch": 0.19, + "learning_rate": 3.736462424909669e-05, + "loss": 1.2696, + "step": 905 + }, + { + "epoch": 0.19, + "learning_rate": 3.735785929090983e-05, + "loss": 1.2589, + "step": 906 + }, + { + "epoch": 0.19, + "learning_rate": 3.7351086275124023e-05, + "loss": 1.2937, + "step": 907 + }, + { + "epoch": 0.19, + "learning_rate": 3.7344305204883326e-05, + "loss": 1.36, + "step": 908 + }, + { + "epoch": 0.19, + "learning_rate": 3.7337516083335536e-05, + "loss": 1.3503, + "step": 909 + }, + { + "epoch": 0.19, + "learning_rate": 3.7330718913632215e-05, + "loss": 1.3143, + "step": 910 + }, + { + "epoch": 0.19, + "learning_rate": 3.732391369892862e-05, + "loss": 1.2691, + "step": 911 + }, + { + "epoch": 0.19, + "learning_rate": 3.731710044238378e-05, + "loss": 1.3538, + "step": 912 + }, + { + "epoch": 0.19, + "learning_rate": 3.731027914716044e-05, + "loss": 1.2826, + "step": 913 + }, + { + "epoch": 0.19, + "learning_rate": 3.7303449816425066e-05, + "loss": 1.3439, + "step": 914 + }, + { + "epoch": 0.19, + "learning_rate": 3.729661245334787e-05, + "loss": 1.3079, + "step": 915 + }, + { + "epoch": 0.19, + "learning_rate": 3.728976706110278e-05, + "loss": 1.2925, + "step": 916 + }, + { + "epoch": 0.19, + "learning_rate": 3.7282913642867484e-05, + "loss": 1.3079, + "step": 917 + }, + { + "epoch": 0.19, + "learning_rate": 3.727605220182334e-05, + "loss": 1.3148, + "step": 918 + }, + { + "epoch": 0.19, + "learning_rate": 3.726918274115548e-05, + "loss": 1.2858, + "step": 919 + }, + { + "epoch": 0.19, + "learning_rate": 3.726230526405273e-05, + "loss": 1.332, + "step": 920 + }, + { + "epoch": 0.19, + "learning_rate": 3.725541977370765e-05, + "loss": 1.3153, + "step": 921 + }, + { + "epoch": 0.19, + "learning_rate": 3.7248526273316524e-05, + "loss": 1.266, + "step": 922 + }, + { + "epoch": 0.19, + "learning_rate": 3.724162476607933e-05, + "loss": 1.3352, + "step": 923 + }, + { + "epoch": 0.19, + "learning_rate": 3.72347152551998e-05, + "loss": 1.2768, + "step": 924 + }, + { + "epoch": 0.19, + "learning_rate": 3.722779774388535e-05, + "loss": 1.283, + "step": 925 + }, + { + "epoch": 0.19, + "learning_rate": 3.722087223534711e-05, + "loss": 1.2801, + "step": 926 + }, + { + "epoch": 0.19, + "learning_rate": 3.721393873279996e-05, + "loss": 1.3038, + "step": 927 + }, + { + "epoch": 0.2, + "learning_rate": 3.720699723946244e-05, + "loss": 1.3247, + "step": 928 + }, + { + "epoch": 0.2, + "learning_rate": 3.720004775855684e-05, + "loss": 1.2946, + "step": 929 + }, + { + "epoch": 0.2, + "learning_rate": 3.719309029330912e-05, + "loss": 1.2982, + "step": 930 + }, + { + "epoch": 0.2, + "learning_rate": 3.7186124846948995e-05, + "loss": 1.2819, + "step": 931 + }, + { + "epoch": 0.2, + "learning_rate": 3.7179151422709845e-05, + "loss": 1.3325, + "step": 932 + }, + { + "epoch": 0.2, + "learning_rate": 3.717217002382875e-05, + "loss": 1.3033, + "step": 933 + }, + { + "epoch": 0.2, + "learning_rate": 3.716518065354654e-05, + "loss": 1.3273, + "step": 934 + }, + { + "epoch": 0.2, + "learning_rate": 3.715818331510769e-05, + "loss": 1.314, + "step": 935 + }, + { + "epoch": 0.2, + "learning_rate": 3.71511780117604e-05, + "loss": 1.3593, + "step": 936 + }, + { + "epoch": 0.2, + "learning_rate": 3.714416474675657e-05, + "loss": 1.3345, + "step": 937 + }, + { + "epoch": 0.2, + "learning_rate": 3.7137143523351787e-05, + "loss": 1.3185, + "step": 938 + }, + { + "epoch": 0.2, + "learning_rate": 3.713011434480534e-05, + "loss": 1.2981, + "step": 939 + }, + { + "epoch": 0.2, + "learning_rate": 3.71230772143802e-05, + "loss": 1.2702, + "step": 940 + }, + { + "epoch": 0.2, + "learning_rate": 3.711603213534303e-05, + "loss": 1.346, + "step": 941 + }, + { + "epoch": 0.2, + "learning_rate": 3.710897911096421e-05, + "loss": 1.2751, + "step": 942 + }, + { + "epoch": 0.2, + "learning_rate": 3.710191814451777e-05, + "loss": 1.307, + "step": 943 + }, + { + "epoch": 0.2, + "learning_rate": 3.7094849239281444e-05, + "loss": 1.3397, + "step": 944 + }, + { + "epoch": 0.2, + "learning_rate": 3.7087772398536656e-05, + "loss": 1.3408, + "step": 945 + }, + { + "epoch": 0.2, + "learning_rate": 3.70806876255685e-05, + "loss": 1.2815, + "step": 946 + }, + { + "epoch": 0.2, + "learning_rate": 3.7073594923665774e-05, + "loss": 1.3294, + "step": 947 + }, + { + "epoch": 0.2, + "learning_rate": 3.7066494296120935e-05, + "loss": 1.3056, + "step": 948 + }, + { + "epoch": 0.2, + "learning_rate": 3.705938574623012e-05, + "loss": 1.3195, + "step": 949 + }, + { + "epoch": 0.2, + "learning_rate": 3.705226927729317e-05, + "loss": 1.3343, + "step": 950 + }, + { + "epoch": 0.2, + "learning_rate": 3.704514489261357e-05, + "loss": 1.2853, + "step": 951 + }, + { + "epoch": 0.2, + "learning_rate": 3.703801259549848e-05, + "loss": 1.3278, + "step": 952 + }, + { + "epoch": 0.2, + "learning_rate": 3.7030872389258777e-05, + "loss": 1.2837, + "step": 953 + }, + { + "epoch": 0.2, + "learning_rate": 3.702372427720895e-05, + "loss": 1.3047, + "step": 954 + }, + { + "epoch": 0.2, + "learning_rate": 3.701656826266721e-05, + "loss": 1.3364, + "step": 955 + }, + { + "epoch": 0.2, + "learning_rate": 3.7009404348955385e-05, + "loss": 1.3342, + "step": 956 + }, + { + "epoch": 0.2, + "learning_rate": 3.7002232539399014e-05, + "loss": 1.3222, + "step": 957 + }, + { + "epoch": 0.2, + "learning_rate": 3.6995052837327274e-05, + "loss": 1.335, + "step": 958 + }, + { + "epoch": 0.2, + "learning_rate": 3.6987865246073035e-05, + "loss": 1.3284, + "step": 959 + }, + { + "epoch": 0.2, + "learning_rate": 3.6980669768972795e-05, + "loss": 1.3216, + "step": 960 + }, + { + "epoch": 0.2, + "learning_rate": 3.6973466409366735e-05, + "loss": 1.3034, + "step": 961 + }, + { + "epoch": 0.2, + "learning_rate": 3.696625517059868e-05, + "loss": 1.3074, + "step": 962 + }, + { + "epoch": 0.2, + "learning_rate": 3.695903605601612e-05, + "loss": 1.3484, + "step": 963 + }, + { + "epoch": 0.2, + "learning_rate": 3.695180906897021e-05, + "loss": 1.3161, + "step": 964 + }, + { + "epoch": 0.2, + "learning_rate": 3.694457421281575e-05, + "loss": 1.3406, + "step": 965 + }, + { + "epoch": 0.2, + "learning_rate": 3.693733149091119e-05, + "loss": 1.3661, + "step": 966 + }, + { + "epoch": 0.2, + "learning_rate": 3.693008090661864e-05, + "loss": 1.2985, + "step": 967 + }, + { + "epoch": 0.2, + "learning_rate": 3.6922822463303846e-05, + "loss": 1.3351, + "step": 968 + }, + { + "epoch": 0.2, + "learning_rate": 3.691555616433622e-05, + "loss": 1.3145, + "step": 969 + }, + { + "epoch": 0.2, + "learning_rate": 3.69082820130888e-05, + "loss": 1.3249, + "step": 970 + }, + { + "epoch": 0.2, + "learning_rate": 3.69010000129383e-05, + "loss": 1.2538, + "step": 971 + }, + { + "epoch": 0.2, + "learning_rate": 3.689371016726504e-05, + "loss": 1.3523, + "step": 972 + }, + { + "epoch": 0.2, + "learning_rate": 3.6886412479453004e-05, + "loss": 1.3724, + "step": 973 + }, + { + "epoch": 0.2, + "learning_rate": 3.6879106952889826e-05, + "loss": 1.3468, + "step": 974 + }, + { + "epoch": 0.21, + "learning_rate": 3.687179359096675e-05, + "loss": 1.3211, + "step": 975 + }, + { + "epoch": 0.21, + "learning_rate": 3.686447239707868e-05, + "loss": 1.3299, + "step": 976 + }, + { + "epoch": 0.21, + "learning_rate": 3.685714337462415e-05, + "loss": 1.2972, + "step": 977 + }, + { + "epoch": 0.21, + "learning_rate": 3.6849806527005316e-05, + "loss": 1.2821, + "step": 978 + }, + { + "epoch": 0.21, + "learning_rate": 3.6842461857627986e-05, + "loss": 1.3079, + "step": 979 + }, + { + "epoch": 0.21, + "learning_rate": 3.6835109369901586e-05, + "loss": 1.2907, + "step": 980 + }, + { + "epoch": 0.21, + "learning_rate": 3.682774906723918e-05, + "loss": 1.2979, + "step": 981 + }, + { + "epoch": 0.21, + "learning_rate": 3.6820380953057446e-05, + "loss": 1.331, + "step": 982 + }, + { + "epoch": 0.21, + "learning_rate": 3.681300503077671e-05, + "loss": 1.2952, + "step": 983 + }, + { + "epoch": 0.21, + "learning_rate": 3.680562130382089e-05, + "loss": 1.2752, + "step": 984 + }, + { + "epoch": 0.21, + "learning_rate": 3.679822977561756e-05, + "loss": 1.3218, + "step": 985 + }, + { + "epoch": 0.21, + "learning_rate": 3.67908304495979e-05, + "loss": 1.3119, + "step": 986 + }, + { + "epoch": 0.21, + "learning_rate": 3.678342332919671e-05, + "loss": 1.281, + "step": 987 + }, + { + "epoch": 0.21, + "learning_rate": 3.6776008417852415e-05, + "loss": 1.2747, + "step": 988 + }, + { + "epoch": 0.21, + "learning_rate": 3.676858571900704e-05, + "loss": 1.3361, + "step": 989 + }, + { + "epoch": 0.21, + "learning_rate": 3.6761155236106246e-05, + "loss": 1.3624, + "step": 990 + }, + { + "epoch": 0.21, + "learning_rate": 3.67537169725993e-05, + "loss": 1.2927, + "step": 991 + }, + { + "epoch": 0.21, + "learning_rate": 3.6746270931939064e-05, + "loss": 1.3159, + "step": 992 + }, + { + "epoch": 0.21, + "learning_rate": 3.6738817117582045e-05, + "loss": 1.2895, + "step": 993 + }, + { + "epoch": 0.21, + "learning_rate": 3.6731355532988315e-05, + "loss": 1.3099, + "step": 994 + }, + { + "epoch": 0.21, + "learning_rate": 3.6723886181621595e-05, + "loss": 1.3087, + "step": 995 + }, + { + "epoch": 0.21, + "learning_rate": 3.6716409066949184e-05, + "loss": 1.2716, + "step": 996 + }, + { + "epoch": 0.21, + "learning_rate": 3.670892419244199e-05, + "loss": 1.2939, + "step": 997 + }, + { + "epoch": 0.21, + "learning_rate": 3.670143156157454e-05, + "loss": 1.3464, + "step": 998 + }, + { + "epoch": 0.21, + "learning_rate": 3.6693931177824934e-05, + "loss": 1.2778, + "step": 999 + }, + { + "epoch": 0.21, + "learning_rate": 3.66864230446749e-05, + "loss": 1.3137, + "step": 1000 + }, + { + "epoch": 0.21, + "learning_rate": 3.667890716560973e-05, + "loss": 1.308, + "step": 1001 + }, + { + "epoch": 0.21, + "learning_rate": 3.667138354411834e-05, + "loss": 1.3033, + "step": 1002 + }, + { + "epoch": 0.21, + "learning_rate": 3.666385218369324e-05, + "loss": 1.3094, + "step": 1003 + }, + { + "epoch": 0.21, + "learning_rate": 3.6656313087830505e-05, + "loss": 1.3138, + "step": 1004 + }, + { + "epoch": 0.21, + "learning_rate": 3.664876626002982e-05, + "loss": 1.3232, + "step": 1005 + }, + { + "epoch": 0.21, + "learning_rate": 3.6641211703794466e-05, + "loss": 1.331, + "step": 1006 + }, + { + "epoch": 0.21, + "learning_rate": 3.66336494226313e-05, + "loss": 1.322, + "step": 1007 + }, + { + "epoch": 0.21, + "learning_rate": 3.662607942005077e-05, + "loss": 1.3342, + "step": 1008 + }, + { + "epoch": 0.21, + "learning_rate": 3.66185016995669e-05, + "loss": 1.3037, + "step": 1009 + }, + { + "epoch": 0.21, + "learning_rate": 3.661091626469731e-05, + "loss": 1.3338, + "step": 1010 + }, + { + "epoch": 0.21, + "learning_rate": 3.6603323118963194e-05, + "loss": 1.2913, + "step": 1011 + }, + { + "epoch": 0.21, + "learning_rate": 3.659572226588932e-05, + "loss": 1.3209, + "step": 1012 + }, + { + "epoch": 0.21, + "learning_rate": 3.658811370900404e-05, + "loss": 1.2759, + "step": 1013 + }, + { + "epoch": 0.21, + "learning_rate": 3.658049745183928e-05, + "loss": 1.3323, + "step": 1014 + }, + { + "epoch": 0.21, + "learning_rate": 3.657287349793056e-05, + "loss": 1.3535, + "step": 1015 + }, + { + "epoch": 0.21, + "learning_rate": 3.656524185081693e-05, + "loss": 1.2524, + "step": 1016 + }, + { + "epoch": 0.21, + "learning_rate": 3.655760251404105e-05, + "loss": 1.3218, + "step": 1017 + }, + { + "epoch": 0.21, + "learning_rate": 3.654995549114913e-05, + "loss": 1.3505, + "step": 1018 + }, + { + "epoch": 0.21, + "learning_rate": 3.6542300785690954e-05, + "loss": 1.2841, + "step": 1019 + }, + { + "epoch": 0.21, + "learning_rate": 3.6534638401219874e-05, + "loss": 1.3382, + "step": 1020 + }, + { + "epoch": 0.21, + "learning_rate": 3.652696834129281e-05, + "loss": 1.3212, + "step": 1021 + }, + { + "epoch": 0.21, + "learning_rate": 3.6519290609470225e-05, + "loss": 1.3338, + "step": 1022 + }, + { + "epoch": 0.22, + "learning_rate": 3.651160520931617e-05, + "loss": 1.2861, + "step": 1023 + }, + { + "epoch": 0.22, + "learning_rate": 3.650391214439825e-05, + "loss": 1.2802, + "step": 1024 + }, + { + "epoch": 0.22, + "learning_rate": 3.64962114182876e-05, + "loss": 1.2893, + "step": 1025 + }, + { + "epoch": 0.22, + "learning_rate": 3.648850303455895e-05, + "loss": 1.3217, + "step": 1026 + }, + { + "epoch": 0.22, + "learning_rate": 3.6480786996790554e-05, + "loss": 1.3497, + "step": 1027 + }, + { + "epoch": 0.22, + "learning_rate": 3.647306330856425e-05, + "loss": 1.3243, + "step": 1028 + }, + { + "epoch": 0.22, + "learning_rate": 3.646533197346539e-05, + "loss": 1.298, + "step": 1029 + }, + { + "epoch": 0.22, + "learning_rate": 3.6457592995082915e-05, + "loss": 1.2769, + "step": 1030 + }, + { + "epoch": 0.22, + "learning_rate": 3.644984637700928e-05, + "loss": 1.33, + "step": 1031 + }, + { + "epoch": 0.22, + "learning_rate": 3.6442092122840505e-05, + "loss": 1.327, + "step": 1032 + }, + { + "epoch": 0.22, + "learning_rate": 3.643433023617616e-05, + "loss": 1.2911, + "step": 1033 + }, + { + "epoch": 0.22, + "learning_rate": 3.642656072061933e-05, + "loss": 1.2925, + "step": 1034 + }, + { + "epoch": 0.22, + "learning_rate": 3.641878357977668e-05, + "loss": 1.3223, + "step": 1035 + }, + { + "epoch": 0.22, + "learning_rate": 3.641099881725839e-05, + "loss": 1.3262, + "step": 1036 + }, + { + "epoch": 0.22, + "learning_rate": 3.6403206436678173e-05, + "loss": 1.3007, + "step": 1037 + }, + { + "epoch": 0.22, + "learning_rate": 3.63954064416533e-05, + "loss": 1.3166, + "step": 1038 + }, + { + "epoch": 0.22, + "learning_rate": 3.6387598835804555e-05, + "loss": 1.3546, + "step": 1039 + }, + { + "epoch": 0.22, + "learning_rate": 3.6379783622756275e-05, + "loss": 1.3309, + "step": 1040 + }, + { + "epoch": 0.22, + "learning_rate": 3.6371960806136313e-05, + "loss": 1.341, + "step": 1041 + }, + { + "epoch": 0.22, + "learning_rate": 3.636413038957605e-05, + "loss": 1.319, + "step": 1042 + }, + { + "epoch": 0.22, + "learning_rate": 3.635629237671041e-05, + "loss": 1.2956, + "step": 1043 + }, + { + "epoch": 0.22, + "learning_rate": 3.634844677117784e-05, + "loss": 1.3025, + "step": 1044 + }, + { + "epoch": 0.22, + "learning_rate": 3.63405935766203e-05, + "loss": 1.3476, + "step": 1045 + }, + { + "epoch": 0.22, + "learning_rate": 3.633273279668327e-05, + "loss": 1.3185, + "step": 1046 + }, + { + "epoch": 0.22, + "learning_rate": 3.632486443501578e-05, + "loss": 1.311, + "step": 1047 + }, + { + "epoch": 0.22, + "learning_rate": 3.631698849527034e-05, + "loss": 1.3141, + "step": 1048 + }, + { + "epoch": 0.22, + "learning_rate": 3.630910498110302e-05, + "loss": 1.3186, + "step": 1049 + }, + { + "epoch": 0.22, + "learning_rate": 3.630121389617336e-05, + "loss": 1.3305, + "step": 1050 + }, + { + "epoch": 0.22, + "learning_rate": 3.629331524414446e-05, + "loss": 1.3101, + "step": 1051 + }, + { + "epoch": 0.22, + "learning_rate": 3.6285409028682895e-05, + "loss": 1.2645, + "step": 1052 + }, + { + "epoch": 0.22, + "learning_rate": 3.627749525345878e-05, + "loss": 1.3466, + "step": 1053 + }, + { + "epoch": 0.22, + "learning_rate": 3.626957392214571e-05, + "loss": 1.3205, + "step": 1054 + }, + { + "epoch": 0.22, + "learning_rate": 3.626164503842082e-05, + "loss": 1.3256, + "step": 1055 + }, + { + "epoch": 0.22, + "learning_rate": 3.6253708605964724e-05, + "loss": 1.2844, + "step": 1056 + }, + { + "epoch": 0.22, + "learning_rate": 3.6245764628461556e-05, + "loss": 1.3179, + "step": 1057 + }, + { + "epoch": 0.22, + "learning_rate": 3.6237813109598944e-05, + "loss": 1.3443, + "step": 1058 + }, + { + "epoch": 0.22, + "learning_rate": 3.622985405306803e-05, + "loss": 1.2955, + "step": 1059 + }, + { + "epoch": 0.22, + "learning_rate": 3.622188746256343e-05, + "loss": 1.3075, + "step": 1060 + }, + { + "epoch": 0.22, + "learning_rate": 3.621391334178328e-05, + "loss": 1.3125, + "step": 1061 + }, + { + "epoch": 0.22, + "learning_rate": 3.62059316944292e-05, + "loss": 1.2941, + "step": 1062 + }, + { + "epoch": 0.22, + "learning_rate": 3.619794252420632e-05, + "loss": 1.2985, + "step": 1063 + }, + { + "epoch": 0.22, + "learning_rate": 3.618994583482323e-05, + "loss": 1.3151, + "step": 1064 + }, + { + "epoch": 0.22, + "learning_rate": 3.618194162999205e-05, + "loss": 1.296, + "step": 1065 + }, + { + "epoch": 0.22, + "learning_rate": 3.617392991342836e-05, + "loss": 1.3287, + "step": 1066 + }, + { + "epoch": 0.22, + "learning_rate": 3.616591068885123e-05, + "loss": 1.2997, + "step": 1067 + }, + { + "epoch": 0.22, + "learning_rate": 3.6157883959983234e-05, + "loss": 1.2501, + "step": 1068 + }, + { + "epoch": 0.22, + "learning_rate": 3.614984973055041e-05, + "loss": 1.3166, + "step": 1069 + }, + { + "epoch": 0.23, + "learning_rate": 3.614180800428228e-05, + "loss": 1.3497, + "step": 1070 + }, + { + "epoch": 0.23, + "learning_rate": 3.6133758784911864e-05, + "loss": 1.3219, + "step": 1071 + }, + { + "epoch": 0.23, + "learning_rate": 3.6125702076175636e-05, + "loss": 1.3273, + "step": 1072 + }, + { + "epoch": 0.23, + "learning_rate": 3.611763788181356e-05, + "loss": 1.2938, + "step": 1073 + }, + { + "epoch": 0.23, + "learning_rate": 3.610956620556907e-05, + "loss": 1.3263, + "step": 1074 + }, + { + "epoch": 0.23, + "learning_rate": 3.610148705118908e-05, + "loss": 1.3174, + "step": 1075 + }, + { + "epoch": 0.23, + "learning_rate": 3.609340042242397e-05, + "loss": 1.2726, + "step": 1076 + }, + { + "epoch": 0.23, + "learning_rate": 3.6085306323027596e-05, + "loss": 1.3284, + "step": 1077 + }, + { + "epoch": 0.23, + "learning_rate": 3.607720475675727e-05, + "loss": 1.3459, + "step": 1078 + }, + { + "epoch": 0.23, + "learning_rate": 3.606909572737378e-05, + "loss": 1.3363, + "step": 1079 + }, + { + "epoch": 0.23, + "learning_rate": 3.6060979238641363e-05, + "loss": 1.2867, + "step": 1080 + }, + { + "epoch": 0.23, + "learning_rate": 3.6052855294327746e-05, + "loss": 1.3195, + "step": 1081 + }, + { + "epoch": 0.23, + "learning_rate": 3.604472389820409e-05, + "loss": 1.2758, + "step": 1082 + }, + { + "epoch": 0.23, + "learning_rate": 3.6036585054045044e-05, + "loss": 1.3045, + "step": 1083 + }, + { + "epoch": 0.23, + "learning_rate": 3.602843876562868e-05, + "loss": 1.3325, + "step": 1084 + }, + { + "epoch": 0.23, + "learning_rate": 3.6020285036736554e-05, + "loss": 1.2989, + "step": 1085 + }, + { + "epoch": 0.23, + "learning_rate": 3.601212387115366e-05, + "loss": 1.3362, + "step": 1086 + }, + { + "epoch": 0.23, + "learning_rate": 3.6003955272668444e-05, + "loss": 1.314, + "step": 1087 + }, + { + "epoch": 0.23, + "learning_rate": 3.5995779245072816e-05, + "loss": 1.321, + "step": 1088 + }, + { + "epoch": 0.23, + "learning_rate": 3.5987595792162126e-05, + "loss": 1.3526, + "step": 1089 + }, + { + "epoch": 0.23, + "learning_rate": 3.597940491773516e-05, + "loss": 1.2579, + "step": 1090 + }, + { + "epoch": 0.23, + "learning_rate": 3.5971206625594176e-05, + "loss": 1.333, + "step": 1091 + }, + { + "epoch": 0.23, + "learning_rate": 3.5963000919544844e-05, + "loss": 1.3384, + "step": 1092 + }, + { + "epoch": 0.23, + "learning_rate": 3.59547878033963e-05, + "loss": 1.2867, + "step": 1093 + }, + { + "epoch": 0.23, + "learning_rate": 3.594656728096111e-05, + "loss": 1.3241, + "step": 1094 + }, + { + "epoch": 0.23, + "learning_rate": 3.5938339356055274e-05, + "loss": 1.3063, + "step": 1095 + }, + { + "epoch": 0.23, + "learning_rate": 3.593010403249824e-05, + "loss": 1.316, + "step": 1096 + }, + { + "epoch": 0.23, + "learning_rate": 3.592186131411288e-05, + "loss": 1.3018, + "step": 1097 + }, + { + "epoch": 0.23, + "learning_rate": 3.5913611204725496e-05, + "loss": 1.3229, + "step": 1098 + }, + { + "epoch": 0.23, + "learning_rate": 3.590535370816584e-05, + "loss": 1.3447, + "step": 1099 + }, + { + "epoch": 0.23, + "learning_rate": 3.589708882826707e-05, + "loss": 1.3017, + "step": 1100 + }, + { + "epoch": 0.23, + "learning_rate": 3.588881656886578e-05, + "loss": 1.2863, + "step": 1101 + }, + { + "epoch": 0.23, + "learning_rate": 3.5880536933802e-05, + "loss": 1.3163, + "step": 1102 + }, + { + "epoch": 0.23, + "learning_rate": 3.587224992691917e-05, + "loss": 1.3418, + "step": 1103 + }, + { + "epoch": 0.23, + "learning_rate": 3.586395555206417e-05, + "loss": 1.3082, + "step": 1104 + }, + { + "epoch": 0.23, + "learning_rate": 3.585565381308726e-05, + "loss": 1.2337, + "step": 1105 + }, + { + "epoch": 0.23, + "learning_rate": 3.584734471384217e-05, + "loss": 1.2875, + "step": 1106 + }, + { + "epoch": 0.23, + "learning_rate": 3.5839028258186014e-05, + "loss": 1.3158, + "step": 1107 + }, + { + "epoch": 0.23, + "learning_rate": 3.583070444997932e-05, + "loss": 1.3193, + "step": 1108 + }, + { + "epoch": 0.23, + "learning_rate": 3.5822373293086055e-05, + "loss": 1.3294, + "step": 1109 + }, + { + "epoch": 0.23, + "learning_rate": 3.581403479137358e-05, + "loss": 1.2849, + "step": 1110 + }, + { + "epoch": 0.23, + "learning_rate": 3.580568894871265e-05, + "loss": 1.3061, + "step": 1111 + }, + { + "epoch": 0.23, + "learning_rate": 3.579733576897746e-05, + "loss": 1.2782, + "step": 1112 + }, + { + "epoch": 0.23, + "learning_rate": 3.578897525604558e-05, + "loss": 1.3421, + "step": 1113 + }, + { + "epoch": 0.23, + "learning_rate": 3.578060741379801e-05, + "loss": 1.3636, + "step": 1114 + }, + { + "epoch": 0.23, + "learning_rate": 3.577223224611915e-05, + "loss": 1.2847, + "step": 1115 + }, + { + "epoch": 0.23, + "learning_rate": 3.576384975689677e-05, + "loss": 1.2856, + "step": 1116 + }, + { + "epoch": 0.23, + "learning_rate": 3.575545995002207e-05, + "loss": 1.3305, + "step": 1117 + }, + { + "epoch": 0.24, + "learning_rate": 3.574706282938964e-05, + "loss": 1.3576, + "step": 1118 + }, + { + "epoch": 0.24, + "learning_rate": 3.573865839889746e-05, + "loss": 1.3519, + "step": 1119 + }, + { + "epoch": 0.24, + "learning_rate": 3.5730246662446916e-05, + "loss": 1.2934, + "step": 1120 + }, + { + "epoch": 0.24, + "learning_rate": 3.572182762394276e-05, + "loss": 1.3416, + "step": 1121 + }, + { + "epoch": 0.24, + "learning_rate": 3.571340128729315e-05, + "loss": 1.2764, + "step": 1122 + }, + { + "epoch": 0.24, + "learning_rate": 3.570496765640964e-05, + "loss": 1.2825, + "step": 1123 + }, + { + "epoch": 0.24, + "learning_rate": 3.569652673520715e-05, + "loss": 1.3168, + "step": 1124 + }, + { + "epoch": 0.24, + "learning_rate": 3.5688078527604e-05, + "loss": 1.259, + "step": 1125 + }, + { + "epoch": 0.24, + "learning_rate": 3.56796230375219e-05, + "loss": 1.2608, + "step": 1126 + }, + { + "epoch": 0.24, + "learning_rate": 3.567116026888591e-05, + "loss": 1.3269, + "step": 1127 + }, + { + "epoch": 0.24, + "learning_rate": 3.5662690225624484e-05, + "loss": 1.2956, + "step": 1128 + }, + { + "epoch": 0.24, + "learning_rate": 3.565421291166946e-05, + "loss": 1.2896, + "step": 1129 + }, + { + "epoch": 0.24, + "learning_rate": 3.5645728330956074e-05, + "loss": 1.3015, + "step": 1130 + }, + { + "epoch": 0.24, + "learning_rate": 3.563723648742286e-05, + "loss": 1.3056, + "step": 1131 + }, + { + "epoch": 0.24, + "learning_rate": 3.5628737385011814e-05, + "loss": 1.2903, + "step": 1132 + }, + { + "epoch": 0.24, + "learning_rate": 3.562023102766822e-05, + "loss": 1.3046, + "step": 1133 + }, + { + "epoch": 0.24, + "learning_rate": 3.561171741934081e-05, + "loss": 1.3552, + "step": 1134 + }, + { + "epoch": 0.24, + "learning_rate": 3.56031965639816e-05, + "loss": 1.3289, + "step": 1135 + }, + { + "epoch": 0.24, + "learning_rate": 3.559466846554604e-05, + "loss": 1.3383, + "step": 1136 + }, + { + "epoch": 0.24, + "learning_rate": 3.5586133127992904e-05, + "loss": 1.3431, + "step": 1137 + }, + { + "epoch": 0.24, + "learning_rate": 3.557759055528433e-05, + "loss": 1.2967, + "step": 1138 + }, + { + "epoch": 0.24, + "learning_rate": 3.5569040751385825e-05, + "loss": 1.3167, + "step": 1139 + }, + { + "epoch": 0.24, + "learning_rate": 3.556048372026625e-05, + "loss": 1.2515, + "step": 1140 + }, + { + "epoch": 0.24, + "learning_rate": 3.555191946589781e-05, + "loss": 1.3347, + "step": 1141 + }, + { + "epoch": 0.24, + "learning_rate": 3.554334799225608e-05, + "loss": 1.3092, + "step": 1142 + }, + { + "epoch": 0.24, + "learning_rate": 3.553476930331996e-05, + "loss": 1.3033, + "step": 1143 + }, + { + "epoch": 0.24, + "learning_rate": 3.5526183403071754e-05, + "loss": 1.2948, + "step": 1144 + }, + { + "epoch": 0.24, + "learning_rate": 3.551759029549705e-05, + "loss": 1.3345, + "step": 1145 + }, + { + "epoch": 0.24, + "learning_rate": 3.550898998458481e-05, + "loss": 1.2931, + "step": 1146 + }, + { + "epoch": 0.24, + "learning_rate": 3.550038247432734e-05, + "loss": 1.3501, + "step": 1147 + }, + { + "epoch": 0.24, + "learning_rate": 3.549176776872029e-05, + "loss": 1.3781, + "step": 1148 + }, + { + "epoch": 0.24, + "learning_rate": 3.5483145871762646e-05, + "loss": 1.3182, + "step": 1149 + }, + { + "epoch": 0.24, + "learning_rate": 3.547451678745673e-05, + "loss": 1.3186, + "step": 1150 + }, + { + "epoch": 0.24, + "learning_rate": 3.54658805198082e-05, + "loss": 1.3037, + "step": 1151 + }, + { + "epoch": 0.24, + "learning_rate": 3.545723707282606e-05, + "loss": 1.2581, + "step": 1152 + }, + { + "epoch": 0.24, + "learning_rate": 3.5448586450522635e-05, + "loss": 1.3256, + "step": 1153 + }, + { + "epoch": 0.24, + "learning_rate": 3.5439928656913586e-05, + "loss": 1.3381, + "step": 1154 + }, + { + "epoch": 0.24, + "learning_rate": 3.543126369601789e-05, + "loss": 1.2692, + "step": 1155 + }, + { + "epoch": 0.24, + "learning_rate": 3.542259157185787e-05, + "loss": 1.3142, + "step": 1156 + }, + { + "epoch": 0.24, + "learning_rate": 3.5413912288459174e-05, + "loss": 1.3083, + "step": 1157 + }, + { + "epoch": 0.24, + "learning_rate": 3.5405225849850754e-05, + "loss": 1.3383, + "step": 1158 + }, + { + "epoch": 0.24, + "learning_rate": 3.53965322600649e-05, + "loss": 1.2736, + "step": 1159 + }, + { + "epoch": 0.24, + "learning_rate": 3.5387831523137216e-05, + "loss": 1.3476, + "step": 1160 + }, + { + "epoch": 0.24, + "learning_rate": 3.5379123643106625e-05, + "loss": 1.2945, + "step": 1161 + }, + { + "epoch": 0.24, + "learning_rate": 3.5370408624015364e-05, + "loss": 1.3525, + "step": 1162 + }, + { + "epoch": 0.24, + "learning_rate": 3.536168646990899e-05, + "loss": 1.3644, + "step": 1163 + }, + { + "epoch": 0.24, + "learning_rate": 3.535295718483636e-05, + "loss": 1.3243, + "step": 1164 + }, + { + "epoch": 0.25, + "learning_rate": 3.5344220772849654e-05, + "loss": 1.3574, + "step": 1165 + }, + { + "epoch": 0.25, + "learning_rate": 3.533547723800435e-05, + "loss": 1.3056, + "step": 1166 + }, + { + "epoch": 0.25, + "learning_rate": 3.532672658435925e-05, + "loss": 1.2936, + "step": 1167 + }, + { + "epoch": 0.25, + "learning_rate": 3.531796881597643e-05, + "loss": 1.3844, + "step": 1168 + }, + { + "epoch": 0.25, + "learning_rate": 3.53092039369213e-05, + "loss": 1.3335, + "step": 1169 + }, + { + "epoch": 0.25, + "learning_rate": 3.530043195126255e-05, + "loss": 1.2566, + "step": 1170 + }, + { + "epoch": 0.25, + "learning_rate": 3.529165286307219e-05, + "loss": 1.3402, + "step": 1171 + }, + { + "epoch": 0.25, + "learning_rate": 3.528286667642549e-05, + "loss": 1.314, + "step": 1172 + }, + { + "epoch": 0.25, + "learning_rate": 3.527407339540106e-05, + "loss": 1.3547, + "step": 1173 + }, + { + "epoch": 0.25, + "learning_rate": 3.5265273024080776e-05, + "loss": 1.291, + "step": 1174 + }, + { + "epoch": 0.25, + "learning_rate": 3.52564655665498e-05, + "loss": 1.3086, + "step": 1175 + }, + { + "epoch": 0.25, + "learning_rate": 3.524765102689662e-05, + "loss": 1.3185, + "step": 1176 + }, + { + "epoch": 0.25, + "learning_rate": 3.523882940921296e-05, + "loss": 1.3319, + "step": 1177 + }, + { + "epoch": 0.25, + "learning_rate": 3.5230000717593865e-05, + "loss": 1.299, + "step": 1178 + }, + { + "epoch": 0.25, + "learning_rate": 3.522116495613766e-05, + "loss": 1.3019, + "step": 1179 + }, + { + "epoch": 0.25, + "learning_rate": 3.521232212894594e-05, + "loss": 1.2854, + "step": 1180 + }, + { + "epoch": 0.25, + "learning_rate": 3.5203472240123594e-05, + "loss": 1.3305, + "step": 1181 + }, + { + "epoch": 0.25, + "learning_rate": 3.519461529377877e-05, + "loss": 1.308, + "step": 1182 + }, + { + "epoch": 0.25, + "learning_rate": 3.5185751294022914e-05, + "loss": 1.2531, + "step": 1183 + }, + { + "epoch": 0.25, + "learning_rate": 3.5176880244970735e-05, + "loss": 1.2869, + "step": 1184 + }, + { + "epoch": 0.25, + "learning_rate": 3.516800215074021e-05, + "loss": 1.3048, + "step": 1185 + }, + { + "epoch": 0.25, + "learning_rate": 3.515911701545259e-05, + "loss": 1.3049, + "step": 1186 + }, + { + "epoch": 0.25, + "learning_rate": 3.5150224843232405e-05, + "loss": 1.2604, + "step": 1187 + }, + { + "epoch": 0.25, + "learning_rate": 3.514132563820744e-05, + "loss": 1.3201, + "step": 1188 + }, + { + "epoch": 0.25, + "learning_rate": 3.513241940450874e-05, + "loss": 1.3027, + "step": 1189 + }, + { + "epoch": 0.25, + "learning_rate": 3.512350614627062e-05, + "loss": 1.2974, + "step": 1190 + }, + { + "epoch": 0.25, + "learning_rate": 3.511458586763067e-05, + "loss": 1.2961, + "step": 1191 + }, + { + "epoch": 0.25, + "learning_rate": 3.510565857272972e-05, + "loss": 1.2657, + "step": 1192 + }, + { + "epoch": 0.25, + "learning_rate": 3.509672426571185e-05, + "loss": 1.2957, + "step": 1193 + }, + { + "epoch": 0.25, + "learning_rate": 3.508778295072441e-05, + "loss": 1.3012, + "step": 1194 + }, + { + "epoch": 0.25, + "learning_rate": 3.5078834631918014e-05, + "loss": 1.3182, + "step": 1195 + }, + { + "epoch": 0.25, + "learning_rate": 3.506987931344649e-05, + "loss": 1.3301, + "step": 1196 + }, + { + "epoch": 0.25, + "learning_rate": 3.506091699946697e-05, + "loss": 1.2858, + "step": 1197 + }, + { + "epoch": 0.25, + "learning_rate": 3.505194769413977e-05, + "loss": 1.3322, + "step": 1198 + }, + { + "epoch": 0.25, + "learning_rate": 3.504297140162851e-05, + "loss": 1.2556, + "step": 1199 + }, + { + "epoch": 0.25, + "learning_rate": 3.50339881261e-05, + "loss": 1.2901, + "step": 1200 + }, + { + "epoch": 0.25, + "learning_rate": 3.502499787172434e-05, + "loss": 1.3396, + "step": 1201 + }, + { + "epoch": 0.25, + "learning_rate": 3.5016000642674836e-05, + "loss": 1.3205, + "step": 1202 + }, + { + "epoch": 0.25, + "learning_rate": 3.500699644312805e-05, + "loss": 1.3128, + "step": 1203 + }, + { + "epoch": 0.25, + "learning_rate": 3.4997985277263765e-05, + "loss": 1.304, + "step": 1204 + }, + { + "epoch": 0.25, + "learning_rate": 3.498896714926502e-05, + "loss": 1.3031, + "step": 1205 + }, + { + "epoch": 0.25, + "learning_rate": 3.4979942063318066e-05, + "loss": 1.333, + "step": 1206 + }, + { + "epoch": 0.25, + "learning_rate": 3.497091002361238e-05, + "loss": 1.3031, + "step": 1207 + }, + { + "epoch": 0.25, + "learning_rate": 3.496187103434069e-05, + "loss": 1.3285, + "step": 1208 + }, + { + "epoch": 0.25, + "learning_rate": 3.4952825099698926e-05, + "loss": 1.3237, + "step": 1209 + }, + { + "epoch": 0.25, + "learning_rate": 3.4943772223886264e-05, + "loss": 1.3202, + "step": 1210 + }, + { + "epoch": 0.25, + "learning_rate": 3.493471241110507e-05, + "loss": 1.3213, + "step": 1211 + }, + { + "epoch": 0.25, + "learning_rate": 3.492564566556098e-05, + "loss": 1.3025, + "step": 1212 + }, + { + "epoch": 0.26, + "learning_rate": 3.491657199146281e-05, + "loss": 1.314, + "step": 1213 + }, + { + "epoch": 0.26, + "learning_rate": 3.490749139302258e-05, + "loss": 1.3774, + "step": 1214 + }, + { + "epoch": 0.26, + "learning_rate": 3.4898403874455584e-05, + "loss": 1.3192, + "step": 1215 + }, + { + "epoch": 0.26, + "learning_rate": 3.4889309439980256e-05, + "loss": 1.2976, + "step": 1216 + }, + { + "epoch": 0.26, + "learning_rate": 3.488020809381829e-05, + "loss": 1.334, + "step": 1217 + }, + { + "epoch": 0.26, + "learning_rate": 3.4871099840194575e-05, + "loss": 1.29, + "step": 1218 + }, + { + "epoch": 0.26, + "learning_rate": 3.4861984683337205e-05, + "loss": 1.2634, + "step": 1219 + }, + { + "epoch": 0.26, + "learning_rate": 3.485286262747747e-05, + "loss": 1.2728, + "step": 1220 + }, + { + "epoch": 0.26, + "learning_rate": 3.4843733676849876e-05, + "loss": 1.3374, + "step": 1221 + }, + { + "epoch": 0.26, + "learning_rate": 3.4834597835692117e-05, + "loss": 1.2964, + "step": 1222 + }, + { + "epoch": 0.26, + "learning_rate": 3.482545510824511e-05, + "loss": 1.3318, + "step": 1223 + }, + { + "epoch": 0.26, + "learning_rate": 3.481630549875293e-05, + "loss": 1.2775, + "step": 1224 + }, + { + "epoch": 0.26, + "learning_rate": 3.480714901146289e-05, + "loss": 1.335, + "step": 1225 + }, + { + "epoch": 0.26, + "learning_rate": 3.479798565062546e-05, + "loss": 1.2692, + "step": 1226 + }, + { + "epoch": 0.26, + "learning_rate": 3.478881542049432e-05, + "loss": 1.3023, + "step": 1227 + }, + { + "epoch": 0.26, + "learning_rate": 3.4779638325326326e-05, + "loss": 1.3615, + "step": 1228 + }, + { + "epoch": 0.26, + "learning_rate": 3.477045436938154e-05, + "loss": 1.3302, + "step": 1229 + }, + { + "epoch": 0.26, + "learning_rate": 3.476126355692318e-05, + "loss": 1.3058, + "step": 1230 + }, + { + "epoch": 0.26, + "learning_rate": 3.475206589221768e-05, + "loss": 1.2731, + "step": 1231 + }, + { + "epoch": 0.26, + "learning_rate": 3.4742861379534636e-05, + "loss": 1.3252, + "step": 1232 + }, + { + "epoch": 0.26, + "learning_rate": 3.473365002314682e-05, + "loss": 1.3084, + "step": 1233 + }, + { + "epoch": 0.26, + "learning_rate": 3.4724431827330196e-05, + "loss": 1.2977, + "step": 1234 + }, + { + "epoch": 0.26, + "learning_rate": 3.4715206796363876e-05, + "loss": 1.2764, + "step": 1235 + }, + { + "epoch": 0.26, + "learning_rate": 3.470597493453018e-05, + "loss": 1.31, + "step": 1236 + }, + { + "epoch": 0.26, + "learning_rate": 3.469673624611457e-05, + "loss": 1.2913, + "step": 1237 + }, + { + "epoch": 0.26, + "learning_rate": 3.4687490735405696e-05, + "loss": 1.2798, + "step": 1238 + }, + { + "epoch": 0.26, + "learning_rate": 3.467823840669536e-05, + "loss": 1.346, + "step": 1239 + }, + { + "epoch": 0.26, + "learning_rate": 3.466897926427854e-05, + "loss": 1.2654, + "step": 1240 + }, + { + "epoch": 0.26, + "learning_rate": 3.465971331245337e-05, + "loss": 1.302, + "step": 1241 + }, + { + "epoch": 0.26, + "learning_rate": 3.465044055552116e-05, + "loss": 1.2815, + "step": 1242 + }, + { + "epoch": 0.26, + "learning_rate": 3.464116099778634e-05, + "loss": 1.2751, + "step": 1243 + }, + { + "epoch": 0.26, + "learning_rate": 3.463187464355655e-05, + "loss": 1.3094, + "step": 1244 + }, + { + "epoch": 0.26, + "learning_rate": 3.462258149714255e-05, + "loss": 1.3194, + "step": 1245 + }, + { + "epoch": 0.26, + "learning_rate": 3.461328156285826e-05, + "loss": 1.2753, + "step": 1246 + }, + { + "epoch": 0.26, + "learning_rate": 3.4603974845020754e-05, + "loss": 1.3322, + "step": 1247 + }, + { + "epoch": 0.26, + "learning_rate": 3.4594661347950255e-05, + "loss": 1.372, + "step": 1248 + }, + { + "epoch": 0.26, + "learning_rate": 3.458534107597013e-05, + "loss": 1.311, + "step": 1249 + }, + { + "epoch": 0.26, + "learning_rate": 3.457601403340689e-05, + "loss": 1.3296, + "step": 1250 + }, + { + "epoch": 0.26, + "learning_rate": 3.45666802245902e-05, + "loss": 1.317, + "step": 1251 + }, + { + "epoch": 0.26, + "learning_rate": 3.455733965385284e-05, + "loss": 1.2959, + "step": 1252 + }, + { + "epoch": 0.26, + "learning_rate": 3.454799232553077e-05, + "loss": 1.3007, + "step": 1253 + }, + { + "epoch": 0.26, + "learning_rate": 3.4538638243963045e-05, + "loss": 1.2638, + "step": 1254 + }, + { + "epoch": 0.26, + "learning_rate": 3.4529277413491885e-05, + "loss": 1.3071, + "step": 1255 + }, + { + "epoch": 0.26, + "learning_rate": 3.451990983846262e-05, + "loss": 1.3229, + "step": 1256 + }, + { + "epoch": 0.26, + "learning_rate": 3.451053552322373e-05, + "loss": 1.3249, + "step": 1257 + }, + { + "epoch": 0.26, + "learning_rate": 3.4501154472126815e-05, + "loss": 1.3593, + "step": 1258 + }, + { + "epoch": 0.26, + "learning_rate": 3.4491766689526596e-05, + "loss": 1.2989, + "step": 1259 + }, + { + "epoch": 0.27, + "learning_rate": 3.448237217978093e-05, + "loss": 1.2947, + "step": 1260 + }, + { + "epoch": 0.27, + "learning_rate": 3.4472970947250794e-05, + "loss": 1.2887, + "step": 1261 + }, + { + "epoch": 0.27, + "learning_rate": 3.446356299630028e-05, + "loss": 1.341, + "step": 1262 + }, + { + "epoch": 0.27, + "learning_rate": 3.4454148331296606e-05, + "loss": 1.3018, + "step": 1263 + }, + { + "epoch": 0.27, + "learning_rate": 3.44447269566101e-05, + "loss": 1.3284, + "step": 1264 + }, + { + "epoch": 0.27, + "learning_rate": 3.443529887661421e-05, + "loss": 1.3196, + "step": 1265 + }, + { + "epoch": 0.27, + "learning_rate": 3.44258640956855e-05, + "loss": 1.3603, + "step": 1266 + }, + { + "epoch": 0.27, + "learning_rate": 3.441642261820363e-05, + "loss": 1.2977, + "step": 1267 + }, + { + "epoch": 0.27, + "learning_rate": 3.440697444855137e-05, + "loss": 1.3293, + "step": 1268 + }, + { + "epoch": 0.27, + "learning_rate": 3.439751959111463e-05, + "loss": 1.3461, + "step": 1269 + }, + { + "epoch": 0.27, + "learning_rate": 3.438805805028238e-05, + "loss": 1.3195, + "step": 1270 + }, + { + "epoch": 0.27, + "learning_rate": 3.4378589830446714e-05, + "loss": 1.2863, + "step": 1271 + }, + { + "epoch": 0.27, + "learning_rate": 3.436911493600282e-05, + "loss": 1.2776, + "step": 1272 + }, + { + "epoch": 0.27, + "learning_rate": 3.4359633371349e-05, + "loss": 1.3062, + "step": 1273 + }, + { + "epoch": 0.27, + "learning_rate": 3.435014514088662e-05, + "loss": 1.2985, + "step": 1274 + }, + { + "epoch": 0.27, + "learning_rate": 3.434065024902019e-05, + "loss": 1.2872, + "step": 1275 + }, + { + "epoch": 0.27, + "learning_rate": 3.4331148700157263e-05, + "loss": 1.2782, + "step": 1276 + }, + { + "epoch": 0.27, + "learning_rate": 3.43216404987085e-05, + "loss": 1.3008, + "step": 1277 + }, + { + "epoch": 0.27, + "learning_rate": 3.4312125649087664e-05, + "loss": 1.2953, + "step": 1278 + }, + { + "epoch": 0.27, + "learning_rate": 3.430260415571158e-05, + "loss": 1.3191, + "step": 1279 + }, + { + "epoch": 0.27, + "learning_rate": 3.429307602300019e-05, + "loss": 1.2763, + "step": 1280 + }, + { + "epoch": 0.27, + "learning_rate": 3.4283541255376466e-05, + "loss": 1.2801, + "step": 1281 + }, + { + "epoch": 0.27, + "learning_rate": 3.427399985726652e-05, + "loss": 1.3344, + "step": 1282 + }, + { + "epoch": 0.27, + "learning_rate": 3.42644518330995e-05, + "loss": 1.2885, + "step": 1283 + }, + { + "epoch": 0.27, + "learning_rate": 3.425489718730765e-05, + "loss": 1.3324, + "step": 1284 + }, + { + "epoch": 0.27, + "learning_rate": 3.4245335924326274e-05, + "loss": 1.3043, + "step": 1285 + }, + { + "epoch": 0.27, + "learning_rate": 3.423576804859375e-05, + "loss": 1.2856, + "step": 1286 + }, + { + "epoch": 0.27, + "learning_rate": 3.422619356455154e-05, + "loss": 1.3455, + "step": 1287 + }, + { + "epoch": 0.27, + "learning_rate": 3.421661247664417e-05, + "loss": 1.3051, + "step": 1288 + }, + { + "epoch": 0.27, + "learning_rate": 3.420702478931921e-05, + "loss": 1.3006, + "step": 1289 + }, + { + "epoch": 0.27, + "learning_rate": 3.419743050702732e-05, + "loss": 1.2823, + "step": 1290 + }, + { + "epoch": 0.27, + "learning_rate": 3.4187829634222205e-05, + "loss": 1.3047, + "step": 1291 + }, + { + "epoch": 0.27, + "learning_rate": 3.417822217536064e-05, + "loss": 1.3242, + "step": 1292 + }, + { + "epoch": 0.27, + "learning_rate": 3.4168608134902443e-05, + "loss": 1.2822, + "step": 1293 + }, + { + "epoch": 0.27, + "learning_rate": 3.4158987517310506e-05, + "loss": 1.2905, + "step": 1294 + }, + { + "epoch": 0.27, + "learning_rate": 3.414936032705075e-05, + "loss": 1.3127, + "step": 1295 + }, + { + "epoch": 0.27, + "learning_rate": 3.413972656859218e-05, + "loss": 1.3153, + "step": 1296 + }, + { + "epoch": 0.27, + "learning_rate": 3.413008624640683e-05, + "loss": 1.3078, + "step": 1297 + }, + { + "epoch": 0.27, + "learning_rate": 3.4120439364969766e-05, + "loss": 1.2792, + "step": 1298 + }, + { + "epoch": 0.27, + "learning_rate": 3.411078592875912e-05, + "loss": 1.3039, + "step": 1299 + }, + { + "epoch": 0.27, + "learning_rate": 3.410112594225607e-05, + "loss": 1.2918, + "step": 1300 + }, + { + "epoch": 0.27, + "learning_rate": 3.4091459409944836e-05, + "loss": 1.2701, + "step": 1301 + }, + { + "epoch": 0.27, + "learning_rate": 3.408178633631265e-05, + "loss": 1.2595, + "step": 1302 + }, + { + "epoch": 0.27, + "learning_rate": 3.407210672584979e-05, + "loss": 1.3252, + "step": 1303 + }, + { + "epoch": 0.27, + "learning_rate": 3.40624205830496e-05, + "loss": 1.2767, + "step": 1304 + }, + { + "epoch": 0.27, + "learning_rate": 3.4052727912408414e-05, + "loss": 1.3054, + "step": 1305 + }, + { + "epoch": 0.27, + "learning_rate": 3.404302871842563e-05, + "loss": 1.3169, + "step": 1306 + }, + { + "epoch": 0.27, + "learning_rate": 3.403332300560364e-05, + "loss": 1.2939, + "step": 1307 + }, + { + "epoch": 0.28, + "learning_rate": 3.4023610778447895e-05, + "loss": 1.3022, + "step": 1308 + }, + { + "epoch": 0.28, + "learning_rate": 3.401389204146685e-05, + "loss": 1.3017, + "step": 1309 + }, + { + "epoch": 0.28, + "learning_rate": 3.4004166799172004e-05, + "loss": 1.3318, + "step": 1310 + }, + { + "epoch": 0.28, + "learning_rate": 3.3994435056077827e-05, + "loss": 1.3287, + "step": 1311 + }, + { + "epoch": 0.28, + "learning_rate": 3.3984696816701865e-05, + "loss": 1.2773, + "step": 1312 + }, + { + "epoch": 0.28, + "learning_rate": 3.397495208556465e-05, + "loss": 1.3318, + "step": 1313 + }, + { + "epoch": 0.28, + "learning_rate": 3.3965200867189734e-05, + "loss": 1.3425, + "step": 1314 + }, + { + "epoch": 0.28, + "learning_rate": 3.395544316610367e-05, + "loss": 1.3127, + "step": 1315 + }, + { + "epoch": 0.28, + "learning_rate": 3.394567898683602e-05, + "loss": 1.2811, + "step": 1316 + }, + { + "epoch": 0.28, + "learning_rate": 3.3935908333919385e-05, + "loss": 1.3013, + "step": 1317 + }, + { + "epoch": 0.28, + "learning_rate": 3.3926131211889336e-05, + "loss": 1.2883, + "step": 1318 + }, + { + "epoch": 0.28, + "learning_rate": 3.391634762528445e-05, + "loss": 1.2957, + "step": 1319 + }, + { + "epoch": 0.28, + "learning_rate": 3.3906557578646317e-05, + "loss": 1.2667, + "step": 1320 + }, + { + "epoch": 0.28, + "learning_rate": 3.389676107651953e-05, + "loss": 1.3282, + "step": 1321 + }, + { + "epoch": 0.28, + "learning_rate": 3.388695812345168e-05, + "loss": 1.3189, + "step": 1322 + }, + { + "epoch": 0.28, + "learning_rate": 3.3877148723993306e-05, + "loss": 1.3242, + "step": 1323 + }, + { + "epoch": 0.28, + "learning_rate": 3.3867332882698016e-05, + "loss": 1.2828, + "step": 1324 + }, + { + "epoch": 0.28, + "learning_rate": 3.385751060412235e-05, + "loss": 1.2589, + "step": 1325 + }, + { + "epoch": 0.28, + "learning_rate": 3.3847681892825865e-05, + "loss": 1.287, + "step": 1326 + }, + { + "epoch": 0.28, + "learning_rate": 3.383784675337108e-05, + "loss": 1.3118, + "step": 1327 + }, + { + "epoch": 0.28, + "learning_rate": 3.382800519032353e-05, + "loss": 1.3099, + "step": 1328 + }, + { + "epoch": 0.28, + "learning_rate": 3.38181572082517e-05, + "loss": 1.2927, + "step": 1329 + }, + { + "epoch": 0.28, + "learning_rate": 3.3808302811727074e-05, + "loss": 1.3071, + "step": 1330 + }, + { + "epoch": 0.28, + "learning_rate": 3.379844200532411e-05, + "loss": 1.3211, + "step": 1331 + }, + { + "epoch": 0.28, + "learning_rate": 3.378857479362024e-05, + "loss": 1.3449, + "step": 1332 + }, + { + "epoch": 0.28, + "learning_rate": 3.377870118119587e-05, + "loss": 1.2573, + "step": 1333 + }, + { + "epoch": 0.28, + "learning_rate": 3.376882117263437e-05, + "loss": 1.3084, + "step": 1334 + }, + { + "epoch": 0.28, + "learning_rate": 3.37589347725221e-05, + "loss": 1.2562, + "step": 1335 + }, + { + "epoch": 0.28, + "learning_rate": 3.374904198544836e-05, + "loss": 1.3295, + "step": 1336 + }, + { + "epoch": 0.28, + "learning_rate": 3.373914281600544e-05, + "loss": 1.3231, + "step": 1337 + }, + { + "epoch": 0.28, + "learning_rate": 3.372923726878856e-05, + "loss": 1.3315, + "step": 1338 + }, + { + "epoch": 0.28, + "learning_rate": 3.371932534839594e-05, + "loss": 1.3093, + "step": 1339 + }, + { + "epoch": 0.28, + "learning_rate": 3.370940705942874e-05, + "loss": 1.2672, + "step": 1340 + }, + { + "epoch": 0.28, + "learning_rate": 3.369948240649106e-05, + "loss": 1.3012, + "step": 1341 + }, + { + "epoch": 0.28, + "learning_rate": 3.368955139418998e-05, + "loss": 1.3134, + "step": 1342 + }, + { + "epoch": 0.28, + "learning_rate": 3.367961402713553e-05, + "loss": 1.3119, + "step": 1343 + }, + { + "epoch": 0.28, + "learning_rate": 3.3669670309940663e-05, + "loss": 1.2915, + "step": 1344 + }, + { + "epoch": 0.28, + "learning_rate": 3.365972024722131e-05, + "loss": 1.2518, + "step": 1345 + }, + { + "epoch": 0.28, + "learning_rate": 3.3649763843596334e-05, + "loss": 1.2958, + "step": 1346 + }, + { + "epoch": 0.28, + "learning_rate": 3.363980110368755e-05, + "loss": 1.2964, + "step": 1347 + }, + { + "epoch": 0.28, + "learning_rate": 3.36298320321197e-05, + "loss": 1.333, + "step": 1348 + }, + { + "epoch": 0.28, + "learning_rate": 3.361985663352048e-05, + "loss": 1.3069, + "step": 1349 + }, + { + "epoch": 0.28, + "learning_rate": 3.360987491252051e-05, + "loss": 1.2551, + "step": 1350 + }, + { + "epoch": 0.28, + "learning_rate": 3.359988687375336e-05, + "loss": 1.2971, + "step": 1351 + }, + { + "epoch": 0.28, + "learning_rate": 3.3589892521855515e-05, + "loss": 1.3038, + "step": 1352 + }, + { + "epoch": 0.28, + "learning_rate": 3.35798918614664e-05, + "loss": 1.3001, + "step": 1353 + }, + { + "epoch": 0.28, + "learning_rate": 3.356988489722837e-05, + "loss": 1.3387, + "step": 1354 + }, + { + "epoch": 0.29, + "learning_rate": 3.355987163378671e-05, + "loss": 1.2484, + "step": 1355 + }, + { + "epoch": 0.29, + "learning_rate": 3.354985207578961e-05, + "loss": 1.2616, + "step": 1356 + }, + { + "epoch": 0.29, + "learning_rate": 3.3539826227888216e-05, + "loss": 1.3249, + "step": 1357 + }, + { + "epoch": 0.29, + "learning_rate": 3.352979409473656e-05, + "loss": 1.2968, + "step": 1358 + }, + { + "epoch": 0.29, + "learning_rate": 3.351975568099159e-05, + "loss": 1.3149, + "step": 1359 + }, + { + "epoch": 0.29, + "learning_rate": 3.350971099131322e-05, + "loss": 1.362, + "step": 1360 + }, + { + "epoch": 0.29, + "learning_rate": 3.349966003036421e-05, + "loss": 1.2973, + "step": 1361 + }, + { + "epoch": 0.29, + "learning_rate": 3.3489602802810276e-05, + "loss": 1.2954, + "step": 1362 + }, + { + "epoch": 0.29, + "learning_rate": 3.347953931332004e-05, + "loss": 1.2829, + "step": 1363 + }, + { + "epoch": 0.29, + "learning_rate": 3.3469469566565e-05, + "loss": 1.2816, + "step": 1364 + }, + { + "epoch": 0.29, + "learning_rate": 3.345939356721959e-05, + "loss": 1.3187, + "step": 1365 + }, + { + "epoch": 0.29, + "learning_rate": 3.3449311319961134e-05, + "loss": 1.2707, + "step": 1366 + }, + { + "epoch": 0.29, + "learning_rate": 3.343922282946985e-05, + "loss": 1.3621, + "step": 1367 + }, + { + "epoch": 0.29, + "learning_rate": 3.342912810042888e-05, + "loss": 1.3427, + "step": 1368 + }, + { + "epoch": 0.29, + "learning_rate": 3.3419027137524236e-05, + "loss": 1.271, + "step": 1369 + }, + { + "epoch": 0.29, + "learning_rate": 3.340891994544483e-05, + "loss": 1.2508, + "step": 1370 + }, + { + "epoch": 0.29, + "learning_rate": 3.339880652888246e-05, + "loss": 1.2675, + "step": 1371 + }, + { + "epoch": 0.29, + "learning_rate": 3.338868689253183e-05, + "loss": 1.286, + "step": 1372 + }, + { + "epoch": 0.29, + "learning_rate": 3.337856104109053e-05, + "loss": 1.2766, + "step": 1373 + }, + { + "epoch": 0.29, + "learning_rate": 3.3368428979259006e-05, + "loss": 1.2772, + "step": 1374 + }, + { + "epoch": 0.29, + "learning_rate": 3.335829071174063e-05, + "loss": 1.3164, + "step": 1375 + }, + { + "epoch": 0.29, + "learning_rate": 3.334814624324163e-05, + "loss": 1.2738, + "step": 1376 + }, + { + "epoch": 0.29, + "learning_rate": 3.333799557847109e-05, + "loss": 1.3249, + "step": 1377 + }, + { + "epoch": 0.29, + "learning_rate": 3.332783872214103e-05, + "loss": 1.3157, + "step": 1378 + }, + { + "epoch": 0.29, + "learning_rate": 3.331767567896629e-05, + "loss": 1.2825, + "step": 1379 + }, + { + "epoch": 0.29, + "learning_rate": 3.330750645366461e-05, + "loss": 1.2947, + "step": 1380 + }, + { + "epoch": 0.29, + "learning_rate": 3.3297331050956576e-05, + "loss": 1.3257, + "step": 1381 + }, + { + "epoch": 0.29, + "learning_rate": 3.328714947556568e-05, + "loss": 1.2804, + "step": 1382 + }, + { + "epoch": 0.29, + "learning_rate": 3.327696173221824e-05, + "loss": 1.2964, + "step": 1383 + }, + { + "epoch": 0.29, + "learning_rate": 3.326676782564347e-05, + "loss": 1.3306, + "step": 1384 + }, + { + "epoch": 0.29, + "learning_rate": 3.325656776057341e-05, + "loss": 1.2447, + "step": 1385 + }, + { + "epoch": 0.29, + "learning_rate": 3.324636154174299e-05, + "loss": 1.3169, + "step": 1386 + }, + { + "epoch": 0.29, + "learning_rate": 3.3236149173889975e-05, + "loss": 1.2883, + "step": 1387 + }, + { + "epoch": 0.29, + "learning_rate": 3.3225930661755005e-05, + "loss": 1.3379, + "step": 1388 + }, + { + "epoch": 0.29, + "learning_rate": 3.321570601008155e-05, + "loss": 1.3591, + "step": 1389 + }, + { + "epoch": 0.29, + "learning_rate": 3.320547522361595e-05, + "loss": 1.3079, + "step": 1390 + }, + { + "epoch": 0.29, + "learning_rate": 3.3195238307107375e-05, + "loss": 1.3126, + "step": 1391 + }, + { + "epoch": 0.29, + "learning_rate": 3.318499526530786e-05, + "loss": 1.3291, + "step": 1392 + }, + { + "epoch": 0.29, + "learning_rate": 3.317474610297226e-05, + "loss": 1.3011, + "step": 1393 + }, + { + "epoch": 0.29, + "learning_rate": 3.316449082485829e-05, + "loss": 1.302, + "step": 1394 + }, + { + "epoch": 0.29, + "learning_rate": 3.31542294357265e-05, + "loss": 1.3377, + "step": 1395 + }, + { + "epoch": 0.29, + "learning_rate": 3.3143961940340274e-05, + "loss": 1.2829, + "step": 1396 + }, + { + "epoch": 0.29, + "learning_rate": 3.313368834346583e-05, + "loss": 1.2995, + "step": 1397 + }, + { + "epoch": 0.29, + "learning_rate": 3.312340864987221e-05, + "loss": 1.3037, + "step": 1398 + }, + { + "epoch": 0.29, + "learning_rate": 3.311312286433131e-05, + "loss": 1.3276, + "step": 1399 + }, + { + "epoch": 0.29, + "learning_rate": 3.310283099161783e-05, + "loss": 1.3256, + "step": 1400 + }, + { + "epoch": 0.29, + "learning_rate": 3.309253303650932e-05, + "loss": 1.3359, + "step": 1401 + }, + { + "epoch": 0.29, + "learning_rate": 3.3082229003786114e-05, + "loss": 1.3255, + "step": 1402 + }, + { + "epoch": 0.3, + "learning_rate": 3.3071918898231413e-05, + "loss": 1.2528, + "step": 1403 + }, + { + "epoch": 0.3, + "learning_rate": 3.3061602724631205e-05, + "loss": 1.3548, + "step": 1404 + }, + { + "epoch": 0.3, + "learning_rate": 3.3051280487774316e-05, + "loss": 1.2952, + "step": 1405 + }, + { + "epoch": 0.3, + "learning_rate": 3.304095219245236e-05, + "loss": 1.3009, + "step": 1406 + }, + { + "epoch": 0.3, + "learning_rate": 3.303061784345979e-05, + "loss": 1.3164, + "step": 1407 + }, + { + "epoch": 0.3, + "learning_rate": 3.302027744559387e-05, + "loss": 1.2863, + "step": 1408 + }, + { + "epoch": 0.3, + "learning_rate": 3.300993100365463e-05, + "loss": 1.2838, + "step": 1409 + }, + { + "epoch": 0.3, + "learning_rate": 3.299957852244496e-05, + "loss": 1.3443, + "step": 1410 + }, + { + "epoch": 0.3, + "learning_rate": 3.298922000677053e-05, + "loss": 1.278, + "step": 1411 + }, + { + "epoch": 0.3, + "learning_rate": 3.2978855461439806e-05, + "loss": 1.2888, + "step": 1412 + }, + { + "epoch": 0.3, + "learning_rate": 3.296848489126406e-05, + "loss": 1.2988, + "step": 1413 + }, + { + "epoch": 0.3, + "learning_rate": 3.295810830105736e-05, + "loss": 1.276, + "step": 1414 + }, + { + "epoch": 0.3, + "learning_rate": 3.294772569563656e-05, + "loss": 1.2776, + "step": 1415 + }, + { + "epoch": 0.3, + "learning_rate": 3.293733707982132e-05, + "loss": 1.2864, + "step": 1416 + }, + { + "epoch": 0.3, + "learning_rate": 3.292694245843407e-05, + "loss": 1.2906, + "step": 1417 + }, + { + "epoch": 0.3, + "learning_rate": 3.2916541836300065e-05, + "loss": 1.3245, + "step": 1418 + }, + { + "epoch": 0.3, + "learning_rate": 3.290613521824731e-05, + "loss": 1.3229, + "step": 1419 + }, + { + "epoch": 0.3, + "learning_rate": 3.28957226091066e-05, + "loss": 1.2939, + "step": 1420 + }, + { + "epoch": 0.3, + "learning_rate": 3.2885304013711525e-05, + "loss": 1.2817, + "step": 1421 + }, + { + "epoch": 0.3, + "learning_rate": 3.2874879436898444e-05, + "loss": 1.2686, + "step": 1422 + }, + { + "epoch": 0.3, + "learning_rate": 3.286444888350649e-05, + "loss": 1.3233, + "step": 1423 + }, + { + "epoch": 0.3, + "learning_rate": 3.285401235837758e-05, + "loss": 1.2612, + "step": 1424 + }, + { + "epoch": 0.3, + "learning_rate": 3.28435698663564e-05, + "loss": 1.3166, + "step": 1425 + }, + { + "epoch": 0.3, + "learning_rate": 3.283312141229039e-05, + "loss": 1.3319, + "step": 1426 + }, + { + "epoch": 0.3, + "learning_rate": 3.282266700102978e-05, + "loss": 1.2886, + "step": 1427 + }, + { + "epoch": 0.3, + "learning_rate": 3.281220663742756e-05, + "loss": 1.237, + "step": 1428 + }, + { + "epoch": 0.3, + "learning_rate": 3.280174032633947e-05, + "loss": 1.3069, + "step": 1429 + }, + { + "epoch": 0.3, + "learning_rate": 3.279126807262403e-05, + "loss": 1.2735, + "step": 1430 + }, + { + "epoch": 0.3, + "learning_rate": 3.27807898811425e-05, + "loss": 1.2644, + "step": 1431 + }, + { + "epoch": 0.3, + "learning_rate": 3.277030575675891e-05, + "loss": 1.2884, + "step": 1432 + }, + { + "epoch": 0.3, + "learning_rate": 3.2759815704340034e-05, + "loss": 1.2894, + "step": 1433 + }, + { + "epoch": 0.3, + "learning_rate": 3.2749319728755415e-05, + "loss": 1.2695, + "step": 1434 + }, + { + "epoch": 0.3, + "learning_rate": 3.273881783487732e-05, + "loss": 1.3131, + "step": 1435 + }, + { + "epoch": 0.3, + "learning_rate": 3.2728310027580786e-05, + "loss": 1.3413, + "step": 1436 + }, + { + "epoch": 0.3, + "learning_rate": 3.271779631174358e-05, + "loss": 1.2963, + "step": 1437 + }, + { + "epoch": 0.3, + "learning_rate": 3.270727669224622e-05, + "loss": 1.3176, + "step": 1438 + }, + { + "epoch": 0.3, + "learning_rate": 3.269675117397196e-05, + "loss": 1.3155, + "step": 1439 + }, + { + "epoch": 0.3, + "learning_rate": 3.268621976180681e-05, + "loss": 1.2833, + "step": 1440 + }, + { + "epoch": 0.3, + "learning_rate": 3.267568246063948e-05, + "loss": 1.2208, + "step": 1441 + }, + { + "epoch": 0.3, + "learning_rate": 3.2665139275361446e-05, + "loss": 1.3166, + "step": 1442 + }, + { + "epoch": 0.3, + "learning_rate": 3.26545902108669e-05, + "loss": 1.2933, + "step": 1443 + }, + { + "epoch": 0.3, + "learning_rate": 3.2644035272052756e-05, + "loss": 1.3392, + "step": 1444 + }, + { + "epoch": 0.3, + "learning_rate": 3.263347446381869e-05, + "loss": 1.2914, + "step": 1445 + }, + { + "epoch": 0.3, + "learning_rate": 3.2622907791067056e-05, + "loss": 1.2961, + "step": 1446 + }, + { + "epoch": 0.3, + "learning_rate": 3.261233525870296e-05, + "loss": 1.3064, + "step": 1447 + }, + { + "epoch": 0.3, + "learning_rate": 3.260175687163423e-05, + "loss": 1.3228, + "step": 1448 + }, + { + "epoch": 0.3, + "learning_rate": 3.259117263477138e-05, + "loss": 1.2475, + "step": 1449 + }, + { + "epoch": 0.3, + "learning_rate": 3.2580582553027684e-05, + "loss": 1.2894, + "step": 1450 + }, + { + "epoch": 0.31, + "learning_rate": 3.2569986631319104e-05, + "loss": 1.2327, + "step": 1451 + }, + { + "epoch": 0.31, + "learning_rate": 3.2559384874564305e-05, + "loss": 1.2908, + "step": 1452 + }, + { + "epoch": 0.31, + "learning_rate": 3.254877728768468e-05, + "loss": 1.2182, + "step": 1453 + }, + { + "epoch": 0.31, + "learning_rate": 3.2538163875604316e-05, + "loss": 1.2867, + "step": 1454 + }, + { + "epoch": 0.31, + "learning_rate": 3.252754464325001e-05, + "loss": 1.2964, + "step": 1455 + }, + { + "epoch": 0.31, + "learning_rate": 3.251691959555126e-05, + "loss": 1.299, + "step": 1456 + }, + { + "epoch": 0.31, + "learning_rate": 3.2506288737440265e-05, + "loss": 1.2994, + "step": 1457 + }, + { + "epoch": 0.31, + "learning_rate": 3.249565207385192e-05, + "loss": 1.3188, + "step": 1458 + }, + { + "epoch": 0.31, + "learning_rate": 3.24850096097238e-05, + "loss": 1.2994, + "step": 1459 + }, + { + "epoch": 0.31, + "learning_rate": 3.2474361349996205e-05, + "loss": 1.3603, + "step": 1460 + }, + { + "epoch": 0.31, + "learning_rate": 3.2463707299612086e-05, + "loss": 1.3068, + "step": 1461 + }, + { + "epoch": 0.31, + "learning_rate": 3.245304746351712e-05, + "loss": 1.3158, + "step": 1462 + }, + { + "epoch": 0.31, + "learning_rate": 3.2442381846659644e-05, + "loss": 1.3028, + "step": 1463 + }, + { + "epoch": 0.31, + "learning_rate": 3.243171045399069e-05, + "loss": 1.3344, + "step": 1464 + }, + { + "epoch": 0.31, + "learning_rate": 3.2421033290463966e-05, + "loss": 1.262, + "step": 1465 + }, + { + "epoch": 0.31, + "learning_rate": 3.241035036103587e-05, + "loss": 1.2577, + "step": 1466 + }, + { + "epoch": 0.31, + "learning_rate": 3.239966167066545e-05, + "loss": 1.2835, + "step": 1467 + }, + { + "epoch": 0.31, + "learning_rate": 3.2388967224314464e-05, + "loss": 1.2941, + "step": 1468 + }, + { + "epoch": 0.31, + "learning_rate": 3.2378267026947314e-05, + "loss": 1.2777, + "step": 1469 + }, + { + "epoch": 0.31, + "learning_rate": 3.2367561083531074e-05, + "loss": 1.2969, + "step": 1470 + }, + { + "epoch": 0.31, + "learning_rate": 3.235684939903551e-05, + "loss": 1.3229, + "step": 1471 + }, + { + "epoch": 0.31, + "learning_rate": 3.234613197843302e-05, + "loss": 1.31, + "step": 1472 + }, + { + "epoch": 0.31, + "learning_rate": 3.233540882669869e-05, + "loss": 1.2708, + "step": 1473 + }, + { + "epoch": 0.31, + "learning_rate": 3.232467994881026e-05, + "loss": 1.2625, + "step": 1474 + }, + { + "epoch": 0.31, + "learning_rate": 3.2313945349748116e-05, + "loss": 1.3055, + "step": 1475 + }, + { + "epoch": 0.31, + "learning_rate": 3.230320503449531e-05, + "loss": 1.2882, + "step": 1476 + }, + { + "epoch": 0.31, + "learning_rate": 3.2292459008037554e-05, + "loss": 1.3261, + "step": 1477 + }, + { + "epoch": 0.31, + "learning_rate": 3.228170727536319e-05, + "loss": 1.3127, + "step": 1478 + }, + { + "epoch": 0.31, + "learning_rate": 3.227094984146323e-05, + "loss": 1.2909, + "step": 1479 + }, + { + "epoch": 0.31, + "learning_rate": 3.226018671133134e-05, + "loss": 1.2853, + "step": 1480 + }, + { + "epoch": 0.31, + "learning_rate": 3.224941788996378e-05, + "loss": 1.2684, + "step": 1481 + }, + { + "epoch": 0.31, + "learning_rate": 3.223864338235951e-05, + "loss": 1.3026, + "step": 1482 + }, + { + "epoch": 0.31, + "learning_rate": 3.2227863193520115e-05, + "loss": 1.3298, + "step": 1483 + }, + { + "epoch": 0.31, + "learning_rate": 3.221707732844979e-05, + "loss": 1.2929, + "step": 1484 + }, + { + "epoch": 0.31, + "learning_rate": 3.220628579215539e-05, + "loss": 1.3084, + "step": 1485 + }, + { + "epoch": 0.31, + "learning_rate": 3.21954885896464e-05, + "loss": 1.3481, + "step": 1486 + }, + { + "epoch": 0.31, + "learning_rate": 3.2184685725934926e-05, + "loss": 1.286, + "step": 1487 + }, + { + "epoch": 0.31, + "learning_rate": 3.2173877206035714e-05, + "loss": 1.3556, + "step": 1488 + }, + { + "epoch": 0.31, + "learning_rate": 3.2163063034966126e-05, + "loss": 1.2548, + "step": 1489 + }, + { + "epoch": 0.31, + "learning_rate": 3.215224321774614e-05, + "loss": 1.2961, + "step": 1490 + }, + { + "epoch": 0.31, + "learning_rate": 3.214141775939839e-05, + "loss": 1.2892, + "step": 1491 + }, + { + "epoch": 0.31, + "learning_rate": 3.213058666494807e-05, + "loss": 1.2803, + "step": 1492 + }, + { + "epoch": 0.31, + "learning_rate": 3.2119749939423063e-05, + "loss": 1.3108, + "step": 1493 + }, + { + "epoch": 0.31, + "learning_rate": 3.2108907587853794e-05, + "loss": 1.3067, + "step": 1494 + }, + { + "epoch": 0.31, + "learning_rate": 3.2098059615273354e-05, + "loss": 1.2733, + "step": 1495 + }, + { + "epoch": 0.31, + "learning_rate": 3.2087206026717415e-05, + "loss": 1.2772, + "step": 1496 + }, + { + "epoch": 0.31, + "learning_rate": 3.207634682722427e-05, + "loss": 1.2983, + "step": 1497 + }, + { + "epoch": 0.32, + "learning_rate": 3.206548202183479e-05, + "loss": 1.297, + "step": 1498 + }, + { + "epoch": 0.32, + "learning_rate": 3.20546116155925e-05, + "loss": 1.2824, + "step": 1499 + }, + { + "epoch": 0.32, + "learning_rate": 3.204373561354345e-05, + "loss": 1.2834, + "step": 1500 + }, + { + "epoch": 0.32, + "learning_rate": 3.203285402073637e-05, + "loss": 1.3461, + "step": 1501 + }, + { + "epoch": 0.32, + "learning_rate": 3.202196684222253e-05, + "loss": 1.2753, + "step": 1502 + }, + { + "epoch": 0.32, + "learning_rate": 3.2011074083055814e-05, + "loss": 1.283, + "step": 1503 + }, + { + "epoch": 0.32, + "learning_rate": 3.200017574829268e-05, + "loss": 1.3074, + "step": 1504 + }, + { + "epoch": 0.32, + "learning_rate": 3.19892718429922e-05, + "loss": 1.2996, + "step": 1505 + }, + { + "epoch": 0.32, + "learning_rate": 3.1978362372216006e-05, + "loss": 1.3166, + "step": 1506 + }, + { + "epoch": 0.32, + "learning_rate": 3.196744734102833e-05, + "loss": 1.3329, + "step": 1507 + }, + { + "epoch": 0.32, + "learning_rate": 3.195652675449599e-05, + "loss": 1.2842, + "step": 1508 + }, + { + "epoch": 0.32, + "learning_rate": 3.194560061768835e-05, + "loss": 1.314, + "step": 1509 + }, + { + "epoch": 0.32, + "learning_rate": 3.193466893567739e-05, + "loss": 1.2629, + "step": 1510 + }, + { + "epoch": 0.32, + "learning_rate": 3.192373171353765e-05, + "loss": 1.3123, + "step": 1511 + }, + { + "epoch": 0.32, + "learning_rate": 3.1912788956346226e-05, + "loss": 1.2651, + "step": 1512 + }, + { + "epoch": 0.32, + "learning_rate": 3.190184066918281e-05, + "loss": 1.2869, + "step": 1513 + }, + { + "epoch": 0.32, + "learning_rate": 3.189088685712964e-05, + "loss": 1.3235, + "step": 1514 + }, + { + "epoch": 0.32, + "learning_rate": 3.187992752527153e-05, + "loss": 1.2989, + "step": 1515 + }, + { + "epoch": 0.32, + "learning_rate": 3.186896267869585e-05, + "loss": 1.3157, + "step": 1516 + }, + { + "epoch": 0.32, + "learning_rate": 3.1857992322492525e-05, + "loss": 1.308, + "step": 1517 + }, + { + "epoch": 0.32, + "learning_rate": 3.184701646175407e-05, + "loss": 1.2973, + "step": 1518 + }, + { + "epoch": 0.32, + "learning_rate": 3.183603510157551e-05, + "loss": 1.3628, + "step": 1519 + }, + { + "epoch": 0.32, + "learning_rate": 3.1825048247054444e-05, + "loss": 1.3117, + "step": 1520 + }, + { + "epoch": 0.32, + "learning_rate": 3.181405590329102e-05, + "loss": 1.2541, + "step": 1521 + }, + { + "epoch": 0.32, + "learning_rate": 3.180305807538796e-05, + "loss": 1.3173, + "step": 1522 + }, + { + "epoch": 0.32, + "learning_rate": 3.1792054768450466e-05, + "loss": 1.267, + "step": 1523 + }, + { + "epoch": 0.32, + "learning_rate": 3.178104598758636e-05, + "loss": 1.3344, + "step": 1524 + }, + { + "epoch": 0.32, + "learning_rate": 3.1770031737905946e-05, + "loss": 1.3157, + "step": 1525 + }, + { + "epoch": 0.32, + "learning_rate": 3.1759012024522103e-05, + "loss": 1.2994, + "step": 1526 + }, + { + "epoch": 0.32, + "learning_rate": 3.1747986852550225e-05, + "loss": 1.3523, + "step": 1527 + }, + { + "epoch": 0.32, + "learning_rate": 3.173695622710826e-05, + "loss": 1.3186, + "step": 1528 + }, + { + "epoch": 0.32, + "learning_rate": 3.172592015331666e-05, + "loss": 1.2854, + "step": 1529 + }, + { + "epoch": 0.32, + "learning_rate": 3.171487863629843e-05, + "loss": 1.2563, + "step": 1530 + }, + { + "epoch": 0.32, + "learning_rate": 3.1703831681179096e-05, + "loss": 1.3092, + "step": 1531 + }, + { + "epoch": 0.32, + "learning_rate": 3.169277929308669e-05, + "loss": 1.3017, + "step": 1532 + }, + { + "epoch": 0.32, + "learning_rate": 3.168172147715181e-05, + "loss": 1.299, + "step": 1533 + }, + { + "epoch": 0.32, + "learning_rate": 3.1670658238507524e-05, + "loss": 1.3178, + "step": 1534 + }, + { + "epoch": 0.32, + "learning_rate": 3.1659589582289446e-05, + "loss": 1.2963, + "step": 1535 + }, + { + "epoch": 0.32, + "learning_rate": 3.16485155136357e-05, + "loss": 1.3274, + "step": 1536 + }, + { + "epoch": 0.32, + "learning_rate": 3.16374360376869e-05, + "loss": 1.2938, + "step": 1537 + }, + { + "epoch": 0.32, + "learning_rate": 3.1626351159586224e-05, + "loss": 1.2672, + "step": 1538 + }, + { + "epoch": 0.32, + "learning_rate": 3.1615260884479304e-05, + "loss": 1.3287, + "step": 1539 + }, + { + "epoch": 0.32, + "learning_rate": 3.1604165217514296e-05, + "loss": 1.3223, + "step": 1540 + }, + { + "epoch": 0.32, + "learning_rate": 3.159306416384187e-05, + "loss": 1.2831, + "step": 1541 + }, + { + "epoch": 0.32, + "learning_rate": 3.158195772861517e-05, + "loss": 1.3246, + "step": 1542 + }, + { + "epoch": 0.32, + "learning_rate": 3.1570845916989875e-05, + "loss": 1.3066, + "step": 1543 + }, + { + "epoch": 0.32, + "learning_rate": 3.1559728734124125e-05, + "loss": 1.3554, + "step": 1544 + }, + { + "epoch": 0.32, + "learning_rate": 3.154860618517858e-05, + "loss": 1.2883, + "step": 1545 + }, + { + "epoch": 0.33, + "learning_rate": 3.1537478275316364e-05, + "loss": 1.2856, + "step": 1546 + }, + { + "epoch": 0.33, + "learning_rate": 3.152634500970312e-05, + "loss": 1.3114, + "step": 1547 + }, + { + "epoch": 0.33, + "learning_rate": 3.151520639350695e-05, + "loss": 1.3208, + "step": 1548 + }, + { + "epoch": 0.33, + "learning_rate": 3.150406243189846e-05, + "loss": 1.3286, + "step": 1549 + }, + { + "epoch": 0.33, + "learning_rate": 3.1492913130050715e-05, + "loss": 1.3066, + "step": 1550 + }, + { + "epoch": 0.33, + "learning_rate": 3.1481758493139295e-05, + "loss": 1.3445, + "step": 1551 + }, + { + "epoch": 0.33, + "learning_rate": 3.147059852634221e-05, + "loss": 1.3027, + "step": 1552 + }, + { + "epoch": 0.33, + "learning_rate": 3.145943323483999e-05, + "loss": 1.3306, + "step": 1553 + }, + { + "epoch": 0.33, + "learning_rate": 3.144826262381559e-05, + "loss": 1.3253, + "step": 1554 + }, + { + "epoch": 0.33, + "learning_rate": 3.143708669845449e-05, + "loss": 1.2997, + "step": 1555 + }, + { + "epoch": 0.33, + "learning_rate": 3.142590546394459e-05, + "loss": 1.2951, + "step": 1556 + }, + { + "epoch": 0.33, + "learning_rate": 3.141471892547627e-05, + "loss": 1.3165, + "step": 1557 + }, + { + "epoch": 0.33, + "learning_rate": 3.140352708824237e-05, + "loss": 1.2899, + "step": 1558 + }, + { + "epoch": 0.33, + "learning_rate": 3.1392329957438203e-05, + "loss": 1.3088, + "step": 1559 + }, + { + "epoch": 0.33, + "learning_rate": 3.1381127538261524e-05, + "loss": 1.298, + "step": 1560 + }, + { + "epoch": 0.33, + "learning_rate": 3.136991983591255e-05, + "loss": 1.2857, + "step": 1561 + }, + { + "epoch": 0.33, + "learning_rate": 3.1358706855593935e-05, + "loss": 1.2902, + "step": 1562 + }, + { + "epoch": 0.33, + "learning_rate": 3.1347488602510824e-05, + "loss": 1.3583, + "step": 1563 + }, + { + "epoch": 0.33, + "learning_rate": 3.133626508187076e-05, + "loss": 1.3131, + "step": 1564 + }, + { + "epoch": 0.33, + "learning_rate": 3.132503629888376e-05, + "loss": 1.2533, + "step": 1565 + }, + { + "epoch": 0.33, + "learning_rate": 3.131380225876228e-05, + "loss": 1.2924, + "step": 1566 + }, + { + "epoch": 0.33, + "learning_rate": 3.130256296672121e-05, + "loss": 1.3152, + "step": 1567 + }, + { + "epoch": 0.33, + "learning_rate": 3.1291318427977876e-05, + "loss": 1.2727, + "step": 1568 + }, + { + "epoch": 0.33, + "learning_rate": 3.1280068647752066e-05, + "loss": 1.3052, + "step": 1569 + }, + { + "epoch": 0.33, + "learning_rate": 3.126881363126595e-05, + "loss": 1.3038, + "step": 1570 + }, + { + "epoch": 0.33, + "learning_rate": 3.1257553383744186e-05, + "loss": 1.3172, + "step": 1571 + }, + { + "epoch": 0.33, + "learning_rate": 3.1246287910413824e-05, + "loss": 1.2669, + "step": 1572 + }, + { + "epoch": 0.33, + "learning_rate": 3.123501721650434e-05, + "loss": 1.2939, + "step": 1573 + }, + { + "epoch": 0.33, + "learning_rate": 3.122374130724765e-05, + "loss": 1.302, + "step": 1574 + }, + { + "epoch": 0.33, + "learning_rate": 3.1212460187878085e-05, + "loss": 1.2917, + "step": 1575 + }, + { + "epoch": 0.33, + "learning_rate": 3.1201173863632396e-05, + "loss": 1.3215, + "step": 1576 + }, + { + "epoch": 0.33, + "learning_rate": 3.1189882339749735e-05, + "loss": 1.281, + "step": 1577 + }, + { + "epoch": 0.33, + "learning_rate": 3.117858562147169e-05, + "loss": 1.3307, + "step": 1578 + }, + { + "epoch": 0.33, + "learning_rate": 3.116728371404225e-05, + "loss": 1.2713, + "step": 1579 + }, + { + "epoch": 0.33, + "learning_rate": 3.115597662270781e-05, + "loss": 1.2681, + "step": 1580 + }, + { + "epoch": 0.33, + "learning_rate": 3.114466435271717e-05, + "loss": 1.2676, + "step": 1581 + }, + { + "epoch": 0.33, + "learning_rate": 3.113334690932155e-05, + "loss": 1.2849, + "step": 1582 + }, + { + "epoch": 0.33, + "learning_rate": 3.1122024297774545e-05, + "loss": 1.3106, + "step": 1583 + }, + { + "epoch": 0.33, + "learning_rate": 3.111069652333219e-05, + "loss": 1.3076, + "step": 1584 + }, + { + "epoch": 0.33, + "learning_rate": 3.1099363591252844e-05, + "loss": 1.2546, + "step": 1585 + }, + { + "epoch": 0.33, + "learning_rate": 3.1088025506797356e-05, + "loss": 1.3116, + "step": 1586 + }, + { + "epoch": 0.33, + "learning_rate": 3.107668227522889e-05, + "loss": 1.3047, + "step": 1587 + }, + { + "epoch": 0.33, + "learning_rate": 3.106533390181304e-05, + "loss": 1.3274, + "step": 1588 + }, + { + "epoch": 0.33, + "learning_rate": 3.105398039181775e-05, + "loss": 1.2934, + "step": 1589 + }, + { + "epoch": 0.33, + "learning_rate": 3.1042621750513405e-05, + "loss": 1.2906, + "step": 1590 + }, + { + "epoch": 0.33, + "learning_rate": 3.103125798317272e-05, + "loss": 1.26, + "step": 1591 + }, + { + "epoch": 0.33, + "learning_rate": 3.101988909507081e-05, + "loss": 1.2634, + "step": 1592 + }, + { + "epoch": 0.34, + "learning_rate": 3.100851509148517e-05, + "loss": 1.2759, + "step": 1593 + }, + { + "epoch": 0.34, + "learning_rate": 3.099713597769566e-05, + "loss": 1.3192, + "step": 1594 + }, + { + "epoch": 0.34, + "learning_rate": 3.098575175898452e-05, + "loss": 1.2767, + "step": 1595 + }, + { + "epoch": 0.34, + "learning_rate": 3.097436244063636e-05, + "loss": 1.2558, + "step": 1596 + }, + { + "epoch": 0.34, + "learning_rate": 3.0962968027938156e-05, + "loss": 1.3523, + "step": 1597 + }, + { + "epoch": 0.34, + "learning_rate": 3.0951568526179235e-05, + "loss": 1.3043, + "step": 1598 + }, + { + "epoch": 0.34, + "learning_rate": 3.094016394065131e-05, + "loss": 1.3225, + "step": 1599 + }, + { + "epoch": 0.34, + "learning_rate": 3.0928754276648443e-05, + "loss": 1.3094, + "step": 1600 + }, + { + "epoch": 0.34, + "learning_rate": 3.091733953946705e-05, + "loss": 1.2396, + "step": 1601 + }, + { + "epoch": 0.34, + "learning_rate": 3.09059197344059e-05, + "loss": 1.2891, + "step": 1602 + }, + { + "epoch": 0.34, + "learning_rate": 3.089449486676613e-05, + "loss": 1.3109, + "step": 1603 + }, + { + "epoch": 0.34, + "learning_rate": 3.08830649418512e-05, + "loss": 1.3257, + "step": 1604 + }, + { + "epoch": 0.34, + "learning_rate": 3.087162996496696e-05, + "loss": 1.3005, + "step": 1605 + }, + { + "epoch": 0.34, + "learning_rate": 3.086018994142156e-05, + "loss": 1.3104, + "step": 1606 + }, + { + "epoch": 0.34, + "learning_rate": 3.084874487652551e-05, + "loss": 1.3131, + "step": 1607 + }, + { + "epoch": 0.34, + "learning_rate": 3.0837294775591675e-05, + "loss": 1.3353, + "step": 1608 + }, + { + "epoch": 0.34, + "learning_rate": 3.082583964393524e-05, + "loss": 1.3006, + "step": 1609 + }, + { + "epoch": 0.34, + "learning_rate": 3.081437948687373e-05, + "loss": 1.3282, + "step": 1610 + }, + { + "epoch": 0.34, + "learning_rate": 3.0802914309727004e-05, + "loss": 1.3562, + "step": 1611 + }, + { + "epoch": 0.34, + "learning_rate": 3.0791444117817247e-05, + "loss": 1.2589, + "step": 1612 + }, + { + "epoch": 0.34, + "learning_rate": 3.0779968916468974e-05, + "loss": 1.3137, + "step": 1613 + }, + { + "epoch": 0.34, + "learning_rate": 3.076848871100904e-05, + "loss": 1.3063, + "step": 1614 + }, + { + "epoch": 0.34, + "learning_rate": 3.075700350676659e-05, + "loss": 1.3101, + "step": 1615 + }, + { + "epoch": 0.34, + "learning_rate": 3.074551330907312e-05, + "loss": 1.3169, + "step": 1616 + }, + { + "epoch": 0.34, + "learning_rate": 3.073401812326244e-05, + "loss": 1.2861, + "step": 1617 + }, + { + "epoch": 0.34, + "learning_rate": 3.072251795467065e-05, + "loss": 1.306, + "step": 1618 + }, + { + "epoch": 0.34, + "learning_rate": 3.071101280863621e-05, + "loss": 1.2439, + "step": 1619 + }, + { + "epoch": 0.34, + "learning_rate": 3.069950269049983e-05, + "loss": 1.289, + "step": 1620 + }, + { + "epoch": 0.34, + "learning_rate": 3.068798760560458e-05, + "loss": 1.3182, + "step": 1621 + }, + { + "epoch": 0.34, + "learning_rate": 3.067646755929582e-05, + "loss": 1.2638, + "step": 1622 + }, + { + "epoch": 0.34, + "learning_rate": 3.066494255692119e-05, + "loss": 1.2697, + "step": 1623 + }, + { + "epoch": 0.34, + "learning_rate": 3.0653412603830665e-05, + "loss": 1.3101, + "step": 1624 + }, + { + "epoch": 0.34, + "learning_rate": 3.06418777053765e-05, + "loss": 1.3043, + "step": 1625 + }, + { + "epoch": 0.34, + "learning_rate": 3.0630337866913236e-05, + "loss": 1.2993, + "step": 1626 + }, + { + "epoch": 0.34, + "learning_rate": 3.061879309379774e-05, + "loss": 1.2612, + "step": 1627 + }, + { + "epoch": 0.34, + "learning_rate": 3.060724339138913e-05, + "loss": 1.3094, + "step": 1628 + }, + { + "epoch": 0.34, + "learning_rate": 3.0595688765048855e-05, + "loss": 1.2825, + "step": 1629 + }, + { + "epoch": 0.34, + "learning_rate": 3.058412922014061e-05, + "loss": 1.353, + "step": 1630 + }, + { + "epoch": 0.34, + "learning_rate": 3.057256476203038e-05, + "loss": 1.2747, + "step": 1631 + }, + { + "epoch": 0.34, + "learning_rate": 3.056099539608646e-05, + "loss": 1.3117, + "step": 1632 + }, + { + "epoch": 0.34, + "learning_rate": 3.0549421127679395e-05, + "loss": 1.3436, + "step": 1633 + }, + { + "epoch": 0.34, + "learning_rate": 3.053784196218201e-05, + "loss": 1.3072, + "step": 1634 + }, + { + "epoch": 0.34, + "learning_rate": 3.052625790496942e-05, + "loss": 1.3141, + "step": 1635 + }, + { + "epoch": 0.34, + "learning_rate": 3.0514668961418984e-05, + "loss": 1.2224, + "step": 1636 + }, + { + "epoch": 0.34, + "learning_rate": 3.050307513691035e-05, + "loss": 1.2675, + "step": 1637 + }, + { + "epoch": 0.34, + "learning_rate": 3.0491476436825427e-05, + "loss": 1.3372, + "step": 1638 + }, + { + "epoch": 0.34, + "learning_rate": 3.047987286654838e-05, + "loss": 1.2686, + "step": 1639 + }, + { + "epoch": 0.34, + "learning_rate": 3.0468264431465643e-05, + "loss": 1.3408, + "step": 1640 + }, + { + "epoch": 0.35, + "learning_rate": 3.045665113696591e-05, + "loss": 1.3472, + "step": 1641 + }, + { + "epoch": 0.35, + "learning_rate": 3.0445032988440126e-05, + "loss": 1.301, + "step": 1642 + }, + { + "epoch": 0.35, + "learning_rate": 3.0433409991281483e-05, + "loss": 1.2907, + "step": 1643 + }, + { + "epoch": 0.35, + "learning_rate": 3.042178215088543e-05, + "loss": 1.2662, + "step": 1644 + }, + { + "epoch": 0.35, + "learning_rate": 3.041014947264967e-05, + "loss": 1.239, + "step": 1645 + }, + { + "epoch": 0.35, + "learning_rate": 3.0398511961974143e-05, + "loss": 1.2055, + "step": 1646 + }, + { + "epoch": 0.35, + "learning_rate": 3.0386869624261036e-05, + "loss": 1.2929, + "step": 1647 + }, + { + "epoch": 0.35, + "learning_rate": 3.0375222464914782e-05, + "loss": 1.2832, + "step": 1648 + }, + { + "epoch": 0.35, + "learning_rate": 3.0363570489342033e-05, + "loss": 1.2852, + "step": 1649 + }, + { + "epoch": 0.35, + "learning_rate": 3.03519137029517e-05, + "loss": 1.2865, + "step": 1650 + }, + { + "epoch": 0.35, + "learning_rate": 3.034025211115492e-05, + "loss": 1.2482, + "step": 1651 + }, + { + "epoch": 0.35, + "learning_rate": 3.0328585719365057e-05, + "loss": 1.2963, + "step": 1652 + }, + { + "epoch": 0.35, + "learning_rate": 3.0316914532997694e-05, + "loss": 1.2997, + "step": 1653 + }, + { + "epoch": 0.35, + "learning_rate": 3.030523855747066e-05, + "loss": 1.3042, + "step": 1654 + }, + { + "epoch": 0.35, + "learning_rate": 3.0293557798203998e-05, + "loss": 1.3074, + "step": 1655 + }, + { + "epoch": 0.35, + "learning_rate": 3.0281872260619965e-05, + "loss": 1.3307, + "step": 1656 + }, + { + "epoch": 0.35, + "learning_rate": 3.0270181950143045e-05, + "loss": 1.2965, + "step": 1657 + }, + { + "epoch": 0.35, + "learning_rate": 3.025848687219993e-05, + "loss": 1.3051, + "step": 1658 + }, + { + "epoch": 0.35, + "learning_rate": 3.0246787032219535e-05, + "loss": 1.344, + "step": 1659 + }, + { + "epoch": 0.35, + "learning_rate": 3.0235082435632984e-05, + "loss": 1.3226, + "step": 1660 + }, + { + "epoch": 0.35, + "learning_rate": 3.022337308787359e-05, + "loss": 1.2981, + "step": 1661 + }, + { + "epoch": 0.35, + "learning_rate": 3.02116589943769e-05, + "loss": 1.2949, + "step": 1662 + }, + { + "epoch": 0.35, + "learning_rate": 3.019994016058064e-05, + "loss": 1.2734, + "step": 1663 + }, + { + "epoch": 0.35, + "learning_rate": 3.018821659192476e-05, + "loss": 1.2922, + "step": 1664 + }, + { + "epoch": 0.35, + "learning_rate": 3.0176488293851388e-05, + "loss": 1.2911, + "step": 1665 + }, + { + "epoch": 0.35, + "learning_rate": 3.0164755271804856e-05, + "loss": 1.2955, + "step": 1666 + }, + { + "epoch": 0.35, + "learning_rate": 3.015301753123169e-05, + "loss": 1.327, + "step": 1667 + }, + { + "epoch": 0.35, + "learning_rate": 3.0141275077580592e-05, + "loss": 1.2304, + "step": 1668 + }, + { + "epoch": 0.35, + "learning_rate": 3.0129527916302482e-05, + "loss": 1.3248, + "step": 1669 + }, + { + "epoch": 0.35, + "learning_rate": 3.0117776052850427e-05, + "loss": 1.2967, + "step": 1670 + }, + { + "epoch": 0.35, + "learning_rate": 3.0106019492679714e-05, + "loss": 1.2503, + "step": 1671 + }, + { + "epoch": 0.35, + "learning_rate": 3.009425824124778e-05, + "loss": 1.2552, + "step": 1672 + }, + { + "epoch": 0.35, + "learning_rate": 3.008249230401426e-05, + "loss": 1.2909, + "step": 1673 + }, + { + "epoch": 0.35, + "learning_rate": 3.0070721686440953e-05, + "loss": 1.3333, + "step": 1674 + }, + { + "epoch": 0.35, + "learning_rate": 3.0058946393991833e-05, + "loss": 1.2823, + "step": 1675 + }, + { + "epoch": 0.35, + "learning_rate": 3.004716643213305e-05, + "loss": 1.2726, + "step": 1676 + }, + { + "epoch": 0.35, + "learning_rate": 3.003538180633292e-05, + "loss": 1.3186, + "step": 1677 + }, + { + "epoch": 0.35, + "learning_rate": 3.0023592522061916e-05, + "loss": 1.2841, + "step": 1678 + }, + { + "epoch": 0.35, + "learning_rate": 3.0011798584792672e-05, + "loss": 1.3068, + "step": 1679 + }, + { + "epoch": 0.35, + "learning_rate": 3.0000000000000004e-05, + "loss": 1.2998, + "step": 1680 + }, + { + "epoch": 0.35, + "learning_rate": 2.9988196773160857e-05, + "loss": 1.3281, + "step": 1681 + }, + { + "epoch": 0.35, + "learning_rate": 2.9976388909754348e-05, + "loss": 1.2653, + "step": 1682 + }, + { + "epoch": 0.35, + "learning_rate": 2.996457641526174e-05, + "loss": 1.2684, + "step": 1683 + }, + { + "epoch": 0.35, + "learning_rate": 2.995275929516646e-05, + "loss": 1.3073, + "step": 1684 + }, + { + "epoch": 0.35, + "learning_rate": 2.9940937554954053e-05, + "loss": 1.287, + "step": 1685 + }, + { + "epoch": 0.35, + "learning_rate": 2.9929111200112233e-05, + "loss": 1.2919, + "step": 1686 + }, + { + "epoch": 0.35, + "learning_rate": 2.991728023613085e-05, + "loss": 1.2492, + "step": 1687 + }, + { + "epoch": 0.36, + "learning_rate": 2.990544466850189e-05, + "loss": 1.2469, + "step": 1688 + }, + { + "epoch": 0.36, + "learning_rate": 2.9893604502719474e-05, + "loss": 1.3062, + "step": 1689 + }, + { + "epoch": 0.36, + "learning_rate": 2.9881759744279875e-05, + "loss": 1.326, + "step": 1690 + }, + { + "epoch": 0.36, + "learning_rate": 2.986991039868148e-05, + "loss": 1.2793, + "step": 1691 + }, + { + "epoch": 0.36, + "learning_rate": 2.9858056471424804e-05, + "loss": 1.3325, + "step": 1692 + }, + { + "epoch": 0.36, + "learning_rate": 2.98461979680125e-05, + "loss": 1.306, + "step": 1693 + }, + { + "epoch": 0.36, + "learning_rate": 2.983433489394934e-05, + "loss": 1.334, + "step": 1694 + }, + { + "epoch": 0.36, + "learning_rate": 2.9822467254742212e-05, + "loss": 1.3122, + "step": 1695 + }, + { + "epoch": 0.36, + "learning_rate": 2.9810595055900148e-05, + "loss": 1.2584, + "step": 1696 + }, + { + "epoch": 0.36, + "learning_rate": 2.9798718302934255e-05, + "loss": 1.3124, + "step": 1697 + }, + { + "epoch": 0.36, + "learning_rate": 2.9786837001357782e-05, + "loss": 1.2975, + "step": 1698 + }, + { + "epoch": 0.36, + "learning_rate": 2.9774951156686094e-05, + "loss": 1.3272, + "step": 1699 + }, + { + "epoch": 0.36, + "learning_rate": 2.976306077443665e-05, + "loss": 1.2671, + "step": 1700 + }, + { + "epoch": 0.36, + "learning_rate": 2.9751165860129024e-05, + "loss": 1.305, + "step": 1701 + }, + { + "epoch": 0.36, + "learning_rate": 2.973926641928489e-05, + "loss": 1.3157, + "step": 1702 + }, + { + "epoch": 0.36, + "learning_rate": 2.9727362457428012e-05, + "loss": 1.289, + "step": 1703 + }, + { + "epoch": 0.36, + "learning_rate": 2.971545398008428e-05, + "loss": 1.265, + "step": 1704 + }, + { + "epoch": 0.36, + "learning_rate": 2.970354099278166e-05, + "loss": 1.2552, + "step": 1705 + }, + { + "epoch": 0.36, + "learning_rate": 2.9691623501050212e-05, + "loss": 1.2669, + "step": 1706 + }, + { + "epoch": 0.36, + "learning_rate": 2.967970151042209e-05, + "loss": 1.3045, + "step": 1707 + }, + { + "epoch": 0.36, + "learning_rate": 2.9667775026431544e-05, + "loss": 1.323, + "step": 1708 + }, + { + "epoch": 0.36, + "learning_rate": 2.96558440546149e-05, + "loss": 1.2951, + "step": 1709 + }, + { + "epoch": 0.36, + "learning_rate": 2.9643908600510572e-05, + "loss": 1.3034, + "step": 1710 + }, + { + "epoch": 0.36, + "learning_rate": 2.9631968669659047e-05, + "loss": 1.3261, + "step": 1711 + }, + { + "epoch": 0.36, + "learning_rate": 2.9620024267602906e-05, + "loss": 1.2854, + "step": 1712 + }, + { + "epoch": 0.36, + "learning_rate": 2.9608075399886792e-05, + "loss": 1.3118, + "step": 1713 + }, + { + "epoch": 0.36, + "learning_rate": 2.9596122072057424e-05, + "loss": 1.2566, + "step": 1714 + }, + { + "epoch": 0.36, + "learning_rate": 2.958416428966359e-05, + "loss": 1.2837, + "step": 1715 + }, + { + "epoch": 0.36, + "learning_rate": 2.957220205825615e-05, + "loss": 1.2366, + "step": 1716 + }, + { + "epoch": 0.36, + "learning_rate": 2.956023538338803e-05, + "loss": 1.3016, + "step": 1717 + }, + { + "epoch": 0.36, + "learning_rate": 2.9548264270614217e-05, + "loss": 1.265, + "step": 1718 + }, + { + "epoch": 0.36, + "learning_rate": 2.953628872549175e-05, + "loss": 1.2633, + "step": 1719 + }, + { + "epoch": 0.36, + "learning_rate": 2.952430875357974e-05, + "loss": 1.2747, + "step": 1720 + }, + { + "epoch": 0.36, + "learning_rate": 2.9512324360439347e-05, + "loss": 1.279, + "step": 1721 + }, + { + "epoch": 0.36, + "learning_rate": 2.9500335551633773e-05, + "loss": 1.3096, + "step": 1722 + }, + { + "epoch": 0.36, + "learning_rate": 2.9488342332728276e-05, + "loss": 1.3025, + "step": 1723 + }, + { + "epoch": 0.36, + "learning_rate": 2.9476344709290175e-05, + "loss": 1.2609, + "step": 1724 + }, + { + "epoch": 0.36, + "learning_rate": 2.9464342686888826e-05, + "loss": 1.2991, + "step": 1725 + }, + { + "epoch": 0.36, + "learning_rate": 2.9452336271095613e-05, + "loss": 1.2917, + "step": 1726 + }, + { + "epoch": 0.36, + "learning_rate": 2.9440325467483974e-05, + "loss": 1.3211, + "step": 1727 + }, + { + "epoch": 0.36, + "learning_rate": 2.942831028162938e-05, + "loss": 1.3183, + "step": 1728 + }, + { + "epoch": 0.36, + "learning_rate": 2.9416290719109333e-05, + "loss": 1.3264, + "step": 1729 + }, + { + "epoch": 0.36, + "learning_rate": 2.9404266785503376e-05, + "loss": 1.2962, + "step": 1730 + }, + { + "epoch": 0.36, + "learning_rate": 2.9392238486393068e-05, + "loss": 1.28, + "step": 1731 + }, + { + "epoch": 0.36, + "learning_rate": 2.9380205827362007e-05, + "loss": 1.2701, + "step": 1732 + }, + { + "epoch": 0.36, + "learning_rate": 2.9368168813995806e-05, + "loss": 1.2984, + "step": 1733 + }, + { + "epoch": 0.36, + "learning_rate": 2.9356127451882105e-05, + "loss": 1.2856, + "step": 1734 + }, + { + "epoch": 0.36, + "learning_rate": 2.934408174661055e-05, + "loss": 1.264, + "step": 1735 + }, + { + "epoch": 0.37, + "learning_rate": 2.933203170377283e-05, + "loss": 1.2299, + "step": 1736 + }, + { + "epoch": 0.37, + "learning_rate": 2.931997732896262e-05, + "loss": 1.2662, + "step": 1737 + }, + { + "epoch": 0.37, + "learning_rate": 2.9307918627775627e-05, + "loss": 1.2906, + "step": 1738 + }, + { + "epoch": 0.37, + "learning_rate": 2.9295855605809543e-05, + "loss": 1.2641, + "step": 1739 + }, + { + "epoch": 0.37, + "learning_rate": 2.9283788268664085e-05, + "loss": 1.3194, + "step": 1740 + }, + { + "epoch": 0.37, + "learning_rate": 2.9271716621940965e-05, + "loss": 1.3055, + "step": 1741 + }, + { + "epoch": 0.37, + "learning_rate": 2.9259640671243903e-05, + "loss": 1.3295, + "step": 1742 + }, + { + "epoch": 0.37, + "learning_rate": 2.9247560422178604e-05, + "loss": 1.3002, + "step": 1743 + }, + { + "epoch": 0.37, + "learning_rate": 2.923547588035278e-05, + "loss": 1.2909, + "step": 1744 + }, + { + "epoch": 0.37, + "learning_rate": 2.9223387051376133e-05, + "loss": 1.3057, + "step": 1745 + }, + { + "epoch": 0.37, + "learning_rate": 2.921129394086035e-05, + "loss": 1.3612, + "step": 1746 + }, + { + "epoch": 0.37, + "learning_rate": 2.919919655441911e-05, + "loss": 1.2866, + "step": 1747 + }, + { + "epoch": 0.37, + "learning_rate": 2.9187094897668076e-05, + "loss": 1.277, + "step": 1748 + }, + { + "epoch": 0.37, + "learning_rate": 2.9174988976224897e-05, + "loss": 1.241, + "step": 1749 + }, + { + "epoch": 0.37, + "learning_rate": 2.916287879570919e-05, + "loss": 1.2827, + "step": 1750 + }, + { + "epoch": 0.37, + "learning_rate": 2.9150764361742564e-05, + "loss": 1.3067, + "step": 1751 + }, + { + "epoch": 0.37, + "learning_rate": 2.9138645679948596e-05, + "loss": 1.3002, + "step": 1752 + }, + { + "epoch": 0.37, + "learning_rate": 2.912652275595283e-05, + "loss": 1.3055, + "step": 1753 + }, + { + "epoch": 0.37, + "learning_rate": 2.9114395595382786e-05, + "loss": 1.2862, + "step": 1754 + }, + { + "epoch": 0.37, + "learning_rate": 2.9102264203867948e-05, + "loss": 1.284, + "step": 1755 + }, + { + "epoch": 0.37, + "learning_rate": 2.9090128587039763e-05, + "loss": 1.3091, + "step": 1756 + }, + { + "epoch": 0.37, + "learning_rate": 2.9077988750531645e-05, + "loss": 1.2661, + "step": 1757 + }, + { + "epoch": 0.37, + "learning_rate": 2.9065844699978957e-05, + "loss": 1.3209, + "step": 1758 + }, + { + "epoch": 0.37, + "learning_rate": 2.9053696441019022e-05, + "loss": 1.2794, + "step": 1759 + }, + { + "epoch": 0.37, + "learning_rate": 2.9041543979291125e-05, + "loss": 1.2528, + "step": 1760 + }, + { + "epoch": 0.37, + "learning_rate": 2.90293873204365e-05, + "loss": 1.2654, + "step": 1761 + }, + { + "epoch": 0.37, + "learning_rate": 2.9017226470098307e-05, + "loss": 1.2904, + "step": 1762 + }, + { + "epoch": 0.37, + "learning_rate": 2.9005061433921685e-05, + "loss": 1.2665, + "step": 1763 + }, + { + "epoch": 0.37, + "learning_rate": 2.8992892217553693e-05, + "loss": 1.3105, + "step": 1764 + }, + { + "epoch": 0.37, + "learning_rate": 2.898071882664334e-05, + "loss": 1.2745, + "step": 1765 + }, + { + "epoch": 0.37, + "learning_rate": 2.896854126684157e-05, + "loss": 1.2836, + "step": 1766 + }, + { + "epoch": 0.37, + "learning_rate": 2.895635954380127e-05, + "loss": 1.2484, + "step": 1767 + }, + { + "epoch": 0.37, + "learning_rate": 2.894417366317724e-05, + "loss": 1.2963, + "step": 1768 + }, + { + "epoch": 0.37, + "learning_rate": 2.8931983630626218e-05, + "loss": 1.2534, + "step": 1769 + }, + { + "epoch": 0.37, + "learning_rate": 2.8919789451806893e-05, + "loss": 1.285, + "step": 1770 + }, + { + "epoch": 0.37, + "learning_rate": 2.890759113237985e-05, + "loss": 1.2559, + "step": 1771 + }, + { + "epoch": 0.37, + "learning_rate": 2.8895388678007602e-05, + "loss": 1.3274, + "step": 1772 + }, + { + "epoch": 0.37, + "learning_rate": 2.8883182094354594e-05, + "loss": 1.1933, + "step": 1773 + }, + { + "epoch": 0.37, + "learning_rate": 2.887097138708717e-05, + "loss": 1.266, + "step": 1774 + }, + { + "epoch": 0.37, + "learning_rate": 2.8858756561873605e-05, + "loss": 1.2943, + "step": 1775 + }, + { + "epoch": 0.37, + "learning_rate": 2.884653762438407e-05, + "loss": 1.3024, + "step": 1776 + }, + { + "epoch": 0.37, + "learning_rate": 2.8834314580290655e-05, + "loss": 1.2913, + "step": 1777 + }, + { + "epoch": 0.37, + "learning_rate": 2.882208743526736e-05, + "loss": 1.2376, + "step": 1778 + }, + { + "epoch": 0.37, + "learning_rate": 2.8809856194990067e-05, + "loss": 1.3143, + "step": 1779 + }, + { + "epoch": 0.37, + "learning_rate": 2.8797620865136594e-05, + "loss": 1.2916, + "step": 1780 + }, + { + "epoch": 0.37, + "learning_rate": 2.8785381451386628e-05, + "loss": 1.3028, + "step": 1781 + }, + { + "epoch": 0.37, + "learning_rate": 2.877313795942176e-05, + "loss": 1.2968, + "step": 1782 + }, + { + "epoch": 0.38, + "learning_rate": 2.8760890394925477e-05, + "loss": 1.2332, + "step": 1783 + }, + { + "epoch": 0.38, + "learning_rate": 2.8748638763583158e-05, + "loss": 1.3258, + "step": 1784 + }, + { + "epoch": 0.38, + "learning_rate": 2.8736383071082065e-05, + "loss": 1.3252, + "step": 1785 + }, + { + "epoch": 0.38, + "learning_rate": 2.872412332311135e-05, + "loss": 1.2971, + "step": 1786 + }, + { + "epoch": 0.38, + "learning_rate": 2.8711859525362045e-05, + "loss": 1.2805, + "step": 1787 + }, + { + "epoch": 0.38, + "learning_rate": 2.8699591683527058e-05, + "loss": 1.3081, + "step": 1788 + }, + { + "epoch": 0.38, + "learning_rate": 2.8687319803301183e-05, + "loss": 1.2288, + "step": 1789 + }, + { + "epoch": 0.38, + "learning_rate": 2.867504389038108e-05, + "loss": 1.2787, + "step": 1790 + }, + { + "epoch": 0.38, + "learning_rate": 2.8662763950465284e-05, + "loss": 1.3432, + "step": 1791 + }, + { + "epoch": 0.38, + "learning_rate": 2.8650479989254206e-05, + "loss": 1.2632, + "step": 1792 + }, + { + "epoch": 0.38, + "learning_rate": 2.8638192012450115e-05, + "loss": 1.266, + "step": 1793 + }, + { + "epoch": 0.38, + "learning_rate": 2.862590002575714e-05, + "loss": 1.2915, + "step": 1794 + }, + { + "epoch": 0.38, + "learning_rate": 2.861360403488129e-05, + "loss": 1.3058, + "step": 1795 + }, + { + "epoch": 0.38, + "learning_rate": 2.8601304045530414e-05, + "loss": 1.2763, + "step": 1796 + }, + { + "epoch": 0.38, + "learning_rate": 2.858900006341422e-05, + "loss": 1.2801, + "step": 1797 + }, + { + "epoch": 0.38, + "learning_rate": 2.8576692094244286e-05, + "loss": 1.2901, + "step": 1798 + }, + { + "epoch": 0.38, + "learning_rate": 2.856438014373402e-05, + "loss": 1.2949, + "step": 1799 + }, + { + "epoch": 0.38, + "learning_rate": 2.8552064217598697e-05, + "loss": 1.2914, + "step": 1800 + }, + { + "epoch": 0.38, + "learning_rate": 2.853974432155541e-05, + "loss": 1.3045, + "step": 1801 + }, + { + "epoch": 0.38, + "learning_rate": 2.852742046132312e-05, + "loss": 1.227, + "step": 1802 + }, + { + "epoch": 0.38, + "learning_rate": 2.8515092642622625e-05, + "loss": 1.2859, + "step": 1803 + }, + { + "epoch": 0.38, + "learning_rate": 2.8502760871176546e-05, + "loss": 1.2837, + "step": 1804 + }, + { + "epoch": 0.38, + "learning_rate": 2.8490425152709367e-05, + "loss": 1.3576, + "step": 1805 + }, + { + "epoch": 0.38, + "learning_rate": 2.847808549294736e-05, + "loss": 1.2636, + "step": 1806 + }, + { + "epoch": 0.38, + "learning_rate": 2.8465741897618673e-05, + "loss": 1.3152, + "step": 1807 + }, + { + "epoch": 0.38, + "learning_rate": 2.8453394372453253e-05, + "loss": 1.348, + "step": 1808 + }, + { + "epoch": 0.38, + "learning_rate": 2.8441042923182872e-05, + "loss": 1.2609, + "step": 1809 + }, + { + "epoch": 0.38, + "learning_rate": 2.842868755554114e-05, + "loss": 1.2705, + "step": 1810 + }, + { + "epoch": 0.38, + "learning_rate": 2.8416328275263472e-05, + "loss": 1.2899, + "step": 1811 + }, + { + "epoch": 0.38, + "learning_rate": 2.8403965088087105e-05, + "loss": 1.3033, + "step": 1812 + }, + { + "epoch": 0.38, + "learning_rate": 2.839159799975109e-05, + "loss": 1.3166, + "step": 1813 + }, + { + "epoch": 0.38, + "learning_rate": 2.8379227015996283e-05, + "loss": 1.2699, + "step": 1814 + }, + { + "epoch": 0.38, + "learning_rate": 2.8366852142565352e-05, + "loss": 1.2821, + "step": 1815 + }, + { + "epoch": 0.38, + "learning_rate": 2.8354473385202772e-05, + "loss": 1.2856, + "step": 1816 + }, + { + "epoch": 0.38, + "learning_rate": 2.834209074965482e-05, + "loss": 1.3096, + "step": 1817 + }, + { + "epoch": 0.38, + "learning_rate": 2.8329704241669574e-05, + "loss": 1.2801, + "step": 1818 + }, + { + "epoch": 0.38, + "learning_rate": 2.8317313866996897e-05, + "loss": 1.2775, + "step": 1819 + }, + { + "epoch": 0.38, + "learning_rate": 2.830491963138848e-05, + "loss": 1.2593, + "step": 1820 + }, + { + "epoch": 0.38, + "learning_rate": 2.8292521540597767e-05, + "loss": 1.2412, + "step": 1821 + }, + { + "epoch": 0.38, + "learning_rate": 2.828011960038002e-05, + "loss": 1.2641, + "step": 1822 + }, + { + "epoch": 0.38, + "learning_rate": 2.826771381649227e-05, + "loss": 1.2691, + "step": 1823 + }, + { + "epoch": 0.38, + "learning_rate": 2.8255304194693343e-05, + "loss": 1.2985, + "step": 1824 + }, + { + "epoch": 0.38, + "learning_rate": 2.8242890740743844e-05, + "loss": 1.2997, + "step": 1825 + }, + { + "epoch": 0.38, + "learning_rate": 2.8230473460406154e-05, + "loss": 1.3265, + "step": 1826 + }, + { + "epoch": 0.38, + "learning_rate": 2.8218052359444434e-05, + "loss": 1.3146, + "step": 1827 + }, + { + "epoch": 0.38, + "learning_rate": 2.8205627443624616e-05, + "loss": 1.3128, + "step": 1828 + }, + { + "epoch": 0.38, + "learning_rate": 2.8193198718714402e-05, + "loss": 1.2767, + "step": 1829 + }, + { + "epoch": 0.38, + "learning_rate": 2.8180766190483263e-05, + "loss": 1.2702, + "step": 1830 + }, + { + "epoch": 0.39, + "learning_rate": 2.8168329864702443e-05, + "loss": 1.3528, + "step": 1831 + }, + { + "epoch": 0.39, + "learning_rate": 2.8155889747144933e-05, + "loss": 1.2752, + "step": 1832 + }, + { + "epoch": 0.39, + "learning_rate": 2.8143445843585498e-05, + "loss": 1.2833, + "step": 1833 + }, + { + "epoch": 0.39, + "learning_rate": 2.8130998159800663e-05, + "loss": 1.3128, + "step": 1834 + }, + { + "epoch": 0.39, + "learning_rate": 2.8118546701568687e-05, + "loss": 1.2616, + "step": 1835 + }, + { + "epoch": 0.39, + "learning_rate": 2.81060914746696e-05, + "loss": 1.2878, + "step": 1836 + }, + { + "epoch": 0.39, + "learning_rate": 2.8093632484885182e-05, + "loss": 1.2238, + "step": 1837 + }, + { + "epoch": 0.39, + "learning_rate": 2.8081169737998956e-05, + "loss": 1.3228, + "step": 1838 + }, + { + "epoch": 0.39, + "learning_rate": 2.8068703239796175e-05, + "loss": 1.242, + "step": 1839 + }, + { + "epoch": 0.39, + "learning_rate": 2.805623299606385e-05, + "loss": 1.2731, + "step": 1840 + }, + { + "epoch": 0.39, + "learning_rate": 2.8043759012590723e-05, + "loss": 1.325, + "step": 1841 + }, + { + "epoch": 0.39, + "learning_rate": 2.803128129516729e-05, + "loss": 1.275, + "step": 1842 + }, + { + "epoch": 0.39, + "learning_rate": 2.801879984958575e-05, + "loss": 1.2973, + "step": 1843 + }, + { + "epoch": 0.39, + "learning_rate": 2.800631468164005e-05, + "loss": 1.3062, + "step": 1844 + }, + { + "epoch": 0.39, + "learning_rate": 2.7993825797125866e-05, + "loss": 1.2365, + "step": 1845 + }, + { + "epoch": 0.39, + "learning_rate": 2.7981333201840595e-05, + "loss": 1.269, + "step": 1846 + }, + { + "epoch": 0.39, + "learning_rate": 2.7968836901583364e-05, + "loss": 1.2851, + "step": 1847 + }, + { + "epoch": 0.39, + "learning_rate": 2.7956336902155003e-05, + "loss": 1.2497, + "step": 1848 + }, + { + "epoch": 0.39, + "learning_rate": 2.7943833209358076e-05, + "loss": 1.2965, + "step": 1849 + }, + { + "epoch": 0.39, + "learning_rate": 2.793132582899686e-05, + "loss": 1.317, + "step": 1850 + }, + { + "epoch": 0.39, + "learning_rate": 2.791881476687733e-05, + "loss": 1.2614, + "step": 1851 + }, + { + "epoch": 0.39, + "learning_rate": 2.790630002880718e-05, + "loss": 1.2413, + "step": 1852 + }, + { + "epoch": 0.39, + "learning_rate": 2.7893781620595818e-05, + "loss": 1.2893, + "step": 1853 + }, + { + "epoch": 0.39, + "learning_rate": 2.788125954805434e-05, + "loss": 1.2954, + "step": 1854 + }, + { + "epoch": 0.39, + "learning_rate": 2.7868733816995553e-05, + "loss": 1.3131, + "step": 1855 + }, + { + "epoch": 0.39, + "learning_rate": 2.7856204433233954e-05, + "loss": 1.2613, + "step": 1856 + }, + { + "epoch": 0.39, + "learning_rate": 2.7843671402585747e-05, + "loss": 1.2626, + "step": 1857 + }, + { + "epoch": 0.39, + "learning_rate": 2.783113473086882e-05, + "loss": 1.2773, + "step": 1858 + }, + { + "epoch": 0.39, + "learning_rate": 2.781859442390276e-05, + "loss": 1.2473, + "step": 1859 + }, + { + "epoch": 0.39, + "learning_rate": 2.780605048750883e-05, + "loss": 1.3302, + "step": 1860 + }, + { + "epoch": 0.39, + "learning_rate": 2.7793502927509988e-05, + "loss": 1.2935, + "step": 1861 + }, + { + "epoch": 0.39, + "learning_rate": 2.7780951749730864e-05, + "loss": 1.3324, + "step": 1862 + }, + { + "epoch": 0.39, + "learning_rate": 2.7768396959997783e-05, + "loss": 1.3192, + "step": 1863 + }, + { + "epoch": 0.39, + "learning_rate": 2.7755838564138722e-05, + "loss": 1.2569, + "step": 1864 + }, + { + "epoch": 0.39, + "learning_rate": 2.7743276567983354e-05, + "loss": 1.247, + "step": 1865 + }, + { + "epoch": 0.39, + "learning_rate": 2.7730710977363023e-05, + "loss": 1.3006, + "step": 1866 + }, + { + "epoch": 0.39, + "learning_rate": 2.771814179811073e-05, + "loss": 1.284, + "step": 1867 + }, + { + "epoch": 0.39, + "learning_rate": 2.7705569036061137e-05, + "loss": 1.2943, + "step": 1868 + }, + { + "epoch": 0.39, + "learning_rate": 2.7692992697050587e-05, + "loss": 1.3039, + "step": 1869 + }, + { + "epoch": 0.39, + "learning_rate": 2.7680412786917074e-05, + "loss": 1.2616, + "step": 1870 + }, + { + "epoch": 0.39, + "learning_rate": 2.7667829311500255e-05, + "loss": 1.2808, + "step": 1871 + }, + { + "epoch": 0.39, + "learning_rate": 2.765524227664143e-05, + "loss": 1.2543, + "step": 1872 + }, + { + "epoch": 0.39, + "learning_rate": 2.7642651688183558e-05, + "loss": 1.2762, + "step": 1873 + }, + { + "epoch": 0.39, + "learning_rate": 2.763005755197126e-05, + "loss": 1.2933, + "step": 1874 + }, + { + "epoch": 0.39, + "learning_rate": 2.7617459873850792e-05, + "loss": 1.3022, + "step": 1875 + }, + { + "epoch": 0.39, + "learning_rate": 2.760485865967004e-05, + "loss": 1.2794, + "step": 1876 + }, + { + "epoch": 0.39, + "learning_rate": 2.7592253915278556e-05, + "loss": 1.2269, + "step": 1877 + }, + { + "epoch": 0.4, + "learning_rate": 2.7579645646527522e-05, + "loss": 1.2871, + "step": 1878 + }, + { + "epoch": 0.4, + "learning_rate": 2.7567033859269754e-05, + "loss": 1.2604, + "step": 1879 + }, + { + "epoch": 0.4, + "learning_rate": 2.75544185593597e-05, + "loss": 1.2772, + "step": 1880 + }, + { + "epoch": 0.4, + "learning_rate": 2.754179975265344e-05, + "loss": 1.2776, + "step": 1881 + }, + { + "epoch": 0.4, + "learning_rate": 2.752917744500868e-05, + "loss": 1.2773, + "step": 1882 + }, + { + "epoch": 0.4, + "learning_rate": 2.7516551642284765e-05, + "loss": 1.2553, + "step": 1883 + }, + { + "epoch": 0.4, + "learning_rate": 2.7503922350342645e-05, + "loss": 1.2927, + "step": 1884 + }, + { + "epoch": 0.4, + "learning_rate": 2.7491289575044893e-05, + "loss": 1.2888, + "step": 1885 + }, + { + "epoch": 0.4, + "learning_rate": 2.7478653322255707e-05, + "loss": 1.2425, + "step": 1886 + }, + { + "epoch": 0.4, + "learning_rate": 2.746601359784089e-05, + "loss": 1.2898, + "step": 1887 + }, + { + "epoch": 0.4, + "learning_rate": 2.745337040766787e-05, + "loss": 1.2953, + "step": 1888 + }, + { + "epoch": 0.4, + "learning_rate": 2.744072375760566e-05, + "loss": 1.3026, + "step": 1889 + }, + { + "epoch": 0.4, + "learning_rate": 2.74280736535249e-05, + "loss": 1.3024, + "step": 1890 + }, + { + "epoch": 0.4, + "learning_rate": 2.7415420101297836e-05, + "loss": 1.2661, + "step": 1891 + }, + { + "epoch": 0.4, + "learning_rate": 2.7402763106798295e-05, + "loss": 1.3313, + "step": 1892 + }, + { + "epoch": 0.4, + "learning_rate": 2.739010267590171e-05, + "loss": 1.323, + "step": 1893 + }, + { + "epoch": 0.4, + "learning_rate": 2.7377438814485117e-05, + "loss": 1.2621, + "step": 1894 + }, + { + "epoch": 0.4, + "learning_rate": 2.7364771528427145e-05, + "loss": 1.2979, + "step": 1895 + }, + { + "epoch": 0.4, + "learning_rate": 2.7352100823608006e-05, + "loss": 1.2774, + "step": 1896 + }, + { + "epoch": 0.4, + "learning_rate": 2.733942670590949e-05, + "loss": 1.3335, + "step": 1897 + }, + { + "epoch": 0.4, + "learning_rate": 2.7326749181214992e-05, + "loss": 1.2822, + "step": 1898 + }, + { + "epoch": 0.4, + "learning_rate": 2.7314068255409466e-05, + "loss": 1.2991, + "step": 1899 + }, + { + "epoch": 0.4, + "learning_rate": 2.7301383934379475e-05, + "loss": 1.2451, + "step": 1900 + }, + { + "epoch": 0.4, + "learning_rate": 2.7288696224013124e-05, + "loss": 1.3066, + "step": 1901 + }, + { + "epoch": 0.4, + "learning_rate": 2.727600513020011e-05, + "loss": 1.2819, + "step": 1902 + }, + { + "epoch": 0.4, + "learning_rate": 2.7263310658831697e-05, + "loss": 1.2558, + "step": 1903 + }, + { + "epoch": 0.4, + "learning_rate": 2.725061281580073e-05, + "loss": 1.3023, + "step": 1904 + }, + { + "epoch": 0.4, + "learning_rate": 2.7237911607001586e-05, + "loss": 1.2866, + "step": 1905 + }, + { + "epoch": 0.4, + "learning_rate": 2.722520703833024e-05, + "loss": 1.2923, + "step": 1906 + }, + { + "epoch": 0.4, + "learning_rate": 2.7212499115684204e-05, + "loss": 1.2528, + "step": 1907 + }, + { + "epoch": 0.4, + "learning_rate": 2.719978784496257e-05, + "loss": 1.2347, + "step": 1908 + }, + { + "epoch": 0.4, + "learning_rate": 2.718707323206595e-05, + "loss": 1.2529, + "step": 1909 + }, + { + "epoch": 0.4, + "learning_rate": 2.717435528289653e-05, + "loss": 1.2897, + "step": 1910 + }, + { + "epoch": 0.4, + "learning_rate": 2.7161634003358056e-05, + "loss": 1.2463, + "step": 1911 + }, + { + "epoch": 0.4, + "learning_rate": 2.7148909399355785e-05, + "loss": 1.2654, + "step": 1912 + }, + { + "epoch": 0.4, + "learning_rate": 2.713618147679655e-05, + "loss": 1.259, + "step": 1913 + }, + { + "epoch": 0.4, + "learning_rate": 2.712345024158871e-05, + "loss": 1.2387, + "step": 1914 + }, + { + "epoch": 0.4, + "learning_rate": 2.711071569964216e-05, + "loss": 1.3236, + "step": 1915 + }, + { + "epoch": 0.4, + "learning_rate": 2.7097977856868336e-05, + "loss": 1.3116, + "step": 1916 + }, + { + "epoch": 0.4, + "learning_rate": 2.70852367191802e-05, + "loss": 1.3156, + "step": 1917 + }, + { + "epoch": 0.4, + "learning_rate": 2.707249229249225e-05, + "loss": 1.2957, + "step": 1918 + }, + { + "epoch": 0.4, + "learning_rate": 2.7059744582720515e-05, + "loss": 1.2634, + "step": 1919 + }, + { + "epoch": 0.4, + "learning_rate": 2.7046993595782532e-05, + "loss": 1.2888, + "step": 1920 + }, + { + "epoch": 0.4, + "learning_rate": 2.7034239337597378e-05, + "loss": 1.2981, + "step": 1921 + }, + { + "epoch": 0.4, + "learning_rate": 2.7021481814085622e-05, + "loss": 1.1841, + "step": 1922 + }, + { + "epoch": 0.4, + "learning_rate": 2.7008721031169378e-05, + "loss": 1.297, + "step": 1923 + }, + { + "epoch": 0.4, + "learning_rate": 2.699595699477226e-05, + "loss": 1.2748, + "step": 1924 + }, + { + "epoch": 0.4, + "learning_rate": 2.6983189710819396e-05, + "loss": 1.282, + "step": 1925 + }, + { + "epoch": 0.41, + "learning_rate": 2.697041918523741e-05, + "loss": 1.3256, + "step": 1926 + }, + { + "epoch": 0.41, + "learning_rate": 2.6957645423954438e-05, + "loss": 1.2638, + "step": 1927 + }, + { + "epoch": 0.41, + "learning_rate": 2.694486843290013e-05, + "loss": 1.2963, + "step": 1928 + }, + { + "epoch": 0.41, + "learning_rate": 2.6932088218005623e-05, + "loss": 1.277, + "step": 1929 + }, + { + "epoch": 0.41, + "learning_rate": 2.6919304785203543e-05, + "loss": 1.2888, + "step": 1930 + }, + { + "epoch": 0.41, + "learning_rate": 2.6906518140428027e-05, + "loss": 1.2757, + "step": 1931 + }, + { + "epoch": 0.41, + "learning_rate": 2.6893728289614693e-05, + "loss": 1.286, + "step": 1932 + }, + { + "epoch": 0.41, + "learning_rate": 2.688093523870065e-05, + "loss": 1.3027, + "step": 1933 + }, + { + "epoch": 0.41, + "learning_rate": 2.6868138993624486e-05, + "loss": 1.3461, + "step": 1934 + }, + { + "epoch": 0.41, + "learning_rate": 2.6855339560326284e-05, + "loss": 1.2842, + "step": 1935 + }, + { + "epoch": 0.41, + "learning_rate": 2.6842536944747597e-05, + "loss": 1.3366, + "step": 1936 + }, + { + "epoch": 0.41, + "learning_rate": 2.682973115283146e-05, + "loss": 1.2429, + "step": 1937 + }, + { + "epoch": 0.41, + "learning_rate": 2.6816922190522386e-05, + "loss": 1.2845, + "step": 1938 + }, + { + "epoch": 0.41, + "learning_rate": 2.6804110063766345e-05, + "loss": 1.3069, + "step": 1939 + }, + { + "epoch": 0.41, + "learning_rate": 2.679129477851079e-05, + "loss": 1.2499, + "step": 1940 + }, + { + "epoch": 0.41, + "learning_rate": 2.6778476340704636e-05, + "loss": 1.2346, + "step": 1941 + }, + { + "epoch": 0.41, + "learning_rate": 2.6765654756298264e-05, + "loss": 1.2508, + "step": 1942 + }, + { + "epoch": 0.41, + "learning_rate": 2.675283003124351e-05, + "loss": 1.2743, + "step": 1943 + }, + { + "epoch": 0.41, + "learning_rate": 2.6740002171493676e-05, + "loss": 1.274, + "step": 1944 + }, + { + "epoch": 0.41, + "learning_rate": 2.6727171183003502e-05, + "loss": 1.2814, + "step": 1945 + }, + { + "epoch": 0.41, + "learning_rate": 2.6714337071729207e-05, + "loss": 1.3031, + "step": 1946 + }, + { + "epoch": 0.41, + "learning_rate": 2.6701499843628443e-05, + "loss": 1.2805, + "step": 1947 + }, + { + "epoch": 0.41, + "learning_rate": 2.66886595046603e-05, + "loss": 1.2729, + "step": 1948 + }, + { + "epoch": 0.41, + "learning_rate": 2.6675816060785327e-05, + "loss": 1.312, + "step": 1949 + }, + { + "epoch": 0.41, + "learning_rate": 2.666296951796552e-05, + "loss": 1.2742, + "step": 1950 + }, + { + "epoch": 0.41, + "learning_rate": 2.6650119882164292e-05, + "loss": 1.292, + "step": 1951 + }, + { + "epoch": 0.41, + "learning_rate": 2.663726715934651e-05, + "loss": 1.2969, + "step": 1952 + }, + { + "epoch": 0.41, + "learning_rate": 2.6624411355478463e-05, + "loss": 1.2734, + "step": 1953 + }, + { + "epoch": 0.41, + "learning_rate": 2.661155247652788e-05, + "loss": 1.293, + "step": 1954 + }, + { + "epoch": 0.41, + "learning_rate": 2.6598690528463916e-05, + "loss": 1.2688, + "step": 1955 + }, + { + "epoch": 0.41, + "learning_rate": 2.6585825517257133e-05, + "loss": 1.2413, + "step": 1956 + }, + { + "epoch": 0.41, + "learning_rate": 2.6572957448879547e-05, + "loss": 1.2887, + "step": 1957 + }, + { + "epoch": 0.41, + "learning_rate": 2.656008632930456e-05, + "loss": 1.318, + "step": 1958 + }, + { + "epoch": 0.41, + "learning_rate": 2.654721216450701e-05, + "loss": 1.267, + "step": 1959 + }, + { + "epoch": 0.41, + "learning_rate": 2.653433496046315e-05, + "loss": 1.265, + "step": 1960 + }, + { + "epoch": 0.41, + "learning_rate": 2.652145472315063e-05, + "loss": 1.2791, + "step": 1961 + }, + { + "epoch": 0.41, + "learning_rate": 2.650857145854852e-05, + "loss": 1.2736, + "step": 1962 + }, + { + "epoch": 0.41, + "learning_rate": 2.6495685172637292e-05, + "loss": 1.3451, + "step": 1963 + }, + { + "epoch": 0.41, + "learning_rate": 2.6482795871398815e-05, + "loss": 1.279, + "step": 1964 + }, + { + "epoch": 0.41, + "learning_rate": 2.646990356081637e-05, + "loss": 1.2973, + "step": 1965 + }, + { + "epoch": 0.41, + "learning_rate": 2.645700824687462e-05, + "loss": 1.278, + "step": 1966 + }, + { + "epoch": 0.41, + "learning_rate": 2.644410993555963e-05, + "loss": 1.2729, + "step": 1967 + }, + { + "epoch": 0.41, + "learning_rate": 2.643120863285886e-05, + "loss": 1.3152, + "step": 1968 + }, + { + "epoch": 0.41, + "learning_rate": 2.6418304344761165e-05, + "loss": 1.3012, + "step": 1969 + }, + { + "epoch": 0.41, + "learning_rate": 2.6405397077256752e-05, + "loss": 1.2313, + "step": 1970 + }, + { + "epoch": 0.41, + "learning_rate": 2.6392486836337256e-05, + "loss": 1.2733, + "step": 1971 + }, + { + "epoch": 0.41, + "learning_rate": 2.637957362799566e-05, + "loss": 1.2687, + "step": 1972 + }, + { + "epoch": 0.41, + "learning_rate": 2.636665745822633e-05, + "loss": 1.3039, + "step": 1973 + }, + { + "epoch": 0.42, + "learning_rate": 2.6353738333025022e-05, + "loss": 1.2624, + "step": 1974 + }, + { + "epoch": 0.42, + "learning_rate": 2.6340816258388858e-05, + "loss": 1.3124, + "step": 1975 + }, + { + "epoch": 0.42, + "learning_rate": 2.6327891240316313e-05, + "loss": 1.2835, + "step": 1976 + }, + { + "epoch": 0.42, + "learning_rate": 2.6314963284807246e-05, + "loss": 1.2391, + "step": 1977 + }, + { + "epoch": 0.42, + "learning_rate": 2.630203239786287e-05, + "loss": 1.2639, + "step": 1978 + }, + { + "epoch": 0.42, + "learning_rate": 2.628909858548577e-05, + "loss": 1.285, + "step": 1979 + }, + { + "epoch": 0.42, + "learning_rate": 2.6276161853679877e-05, + "loss": 1.2698, + "step": 1980 + }, + { + "epoch": 0.42, + "learning_rate": 2.626322220845048e-05, + "loss": 1.2697, + "step": 1981 + }, + { + "epoch": 0.42, + "learning_rate": 2.6250279655804232e-05, + "loss": 1.3049, + "step": 1982 + }, + { + "epoch": 0.42, + "learning_rate": 2.6237334201749126e-05, + "loss": 1.3179, + "step": 1983 + }, + { + "epoch": 0.42, + "learning_rate": 2.6224385852294484e-05, + "loss": 1.3303, + "step": 1984 + }, + { + "epoch": 0.42, + "learning_rate": 2.6211434613451006e-05, + "loss": 1.2867, + "step": 1985 + }, + { + "epoch": 0.42, + "learning_rate": 2.6198480491230712e-05, + "loss": 1.2793, + "step": 1986 + }, + { + "epoch": 0.42, + "learning_rate": 2.618552349164697e-05, + "loss": 1.2727, + "step": 1987 + }, + { + "epoch": 0.42, + "learning_rate": 2.6172563620714475e-05, + "loss": 1.2906, + "step": 1988 + }, + { + "epoch": 0.42, + "learning_rate": 2.6159600884449258e-05, + "loss": 1.266, + "step": 1989 + }, + { + "epoch": 0.42, + "learning_rate": 2.6146635288868685e-05, + "loss": 1.2422, + "step": 1990 + }, + { + "epoch": 0.42, + "learning_rate": 2.6133666839991444e-05, + "loss": 1.2764, + "step": 1991 + }, + { + "epoch": 0.42, + "learning_rate": 2.612069554383755e-05, + "loss": 1.2452, + "step": 1992 + }, + { + "epoch": 0.42, + "learning_rate": 2.6107721406428338e-05, + "loss": 1.3092, + "step": 1993 + }, + { + "epoch": 0.42, + "learning_rate": 2.6094744433786467e-05, + "loss": 1.2806, + "step": 1994 + }, + { + "epoch": 0.42, + "learning_rate": 2.6081764631935896e-05, + "loss": 1.2924, + "step": 1995 + }, + { + "epoch": 0.42, + "learning_rate": 2.606878200690193e-05, + "loss": 1.2903, + "step": 1996 + }, + { + "epoch": 0.42, + "learning_rate": 2.605579656471115e-05, + "loss": 1.2397, + "step": 1997 + }, + { + "epoch": 0.42, + "learning_rate": 2.6042808311391456e-05, + "loss": 1.2608, + "step": 1998 + }, + { + "epoch": 0.42, + "learning_rate": 2.6029817252972064e-05, + "loss": 1.2555, + "step": 1999 + }, + { + "epoch": 0.42, + "learning_rate": 2.6016823395483482e-05, + "loss": 1.2567, + "step": 2000 + }, + { + "epoch": 0.42, + "learning_rate": 2.600382674495751e-05, + "loss": 1.3133, + "step": 2001 + }, + { + "epoch": 0.42, + "learning_rate": 2.5990827307427263e-05, + "loss": 1.3379, + "step": 2002 + }, + { + "epoch": 0.42, + "learning_rate": 2.5977825088927135e-05, + "loss": 1.1998, + "step": 2003 + }, + { + "epoch": 0.42, + "learning_rate": 2.5964820095492825e-05, + "loss": 1.276, + "step": 2004 + }, + { + "epoch": 0.42, + "learning_rate": 2.5951812333161298e-05, + "loss": 1.2713, + "step": 2005 + }, + { + "epoch": 0.42, + "learning_rate": 2.593880180797083e-05, + "loss": 1.2559, + "step": 2006 + }, + { + "epoch": 0.42, + "learning_rate": 2.5925788525960964e-05, + "loss": 1.3017, + "step": 2007 + }, + { + "epoch": 0.42, + "learning_rate": 2.5912772493172523e-05, + "loss": 1.3025, + "step": 2008 + }, + { + "epoch": 0.42, + "learning_rate": 2.5899753715647614e-05, + "loss": 1.3044, + "step": 2009 + }, + { + "epoch": 0.42, + "learning_rate": 2.5886732199429606e-05, + "loss": 1.2433, + "step": 2010 + }, + { + "epoch": 0.42, + "learning_rate": 2.587370795056315e-05, + "loss": 1.2535, + "step": 2011 + }, + { + "epoch": 0.42, + "learning_rate": 2.5860680975094178e-05, + "loss": 1.2794, + "step": 2012 + }, + { + "epoch": 0.42, + "learning_rate": 2.5847651279069847e-05, + "loss": 1.2642, + "step": 2013 + }, + { + "epoch": 0.42, + "learning_rate": 2.5834618868538623e-05, + "loss": 1.3007, + "step": 2014 + }, + { + "epoch": 0.42, + "learning_rate": 2.58215837495502e-05, + "loss": 1.2566, + "step": 2015 + }, + { + "epoch": 0.42, + "learning_rate": 2.5808545928155547e-05, + "loss": 1.3239, + "step": 2016 + }, + { + "epoch": 0.42, + "learning_rate": 2.5795505410406878e-05, + "loss": 1.2546, + "step": 2017 + }, + { + "epoch": 0.42, + "learning_rate": 2.5782462202357664e-05, + "loss": 1.2982, + "step": 2018 + }, + { + "epoch": 0.42, + "learning_rate": 2.5769416310062622e-05, + "loss": 1.3559, + "step": 2019 + }, + { + "epoch": 0.42, + "learning_rate": 2.5756367739577713e-05, + "loss": 1.259, + "step": 2020 + }, + { + "epoch": 0.43, + "learning_rate": 2.5743316496960154e-05, + "loss": 1.3018, + "step": 2021 + }, + { + "epoch": 0.43, + "learning_rate": 2.573026258826838e-05, + "loss": 1.2526, + "step": 2022 + }, + { + "epoch": 0.43, + "learning_rate": 2.571720601956208e-05, + "loss": 1.2777, + "step": 2023 + }, + { + "epoch": 0.43, + "learning_rate": 2.570414679690218e-05, + "loss": 1.2883, + "step": 2024 + }, + { + "epoch": 0.43, + "learning_rate": 2.5691084926350825e-05, + "loss": 1.2386, + "step": 2025 + }, + { + "epoch": 0.43, + "learning_rate": 2.56780204139714e-05, + "loss": 1.2968, + "step": 2026 + }, + { + "epoch": 0.43, + "learning_rate": 2.5664953265828504e-05, + "loss": 1.2461, + "step": 2027 + }, + { + "epoch": 0.43, + "learning_rate": 2.565188348798798e-05, + "loss": 1.2403, + "step": 2028 + }, + { + "epoch": 0.43, + "learning_rate": 2.5638811086516873e-05, + "loss": 1.2555, + "step": 2029 + }, + { + "epoch": 0.43, + "learning_rate": 2.562573606748345e-05, + "loss": 1.2388, + "step": 2030 + }, + { + "epoch": 0.43, + "learning_rate": 2.5612658436957204e-05, + "loss": 1.2985, + "step": 2031 + }, + { + "epoch": 0.43, + "learning_rate": 2.5599578201008824e-05, + "loss": 1.3127, + "step": 2032 + }, + { + "epoch": 0.43, + "learning_rate": 2.5586495365710225e-05, + "loss": 1.297, + "step": 2033 + }, + { + "epoch": 0.43, + "learning_rate": 2.5573409937134508e-05, + "loss": 1.2838, + "step": 2034 + }, + { + "epoch": 0.43, + "learning_rate": 2.5560321921355996e-05, + "loss": 1.2908, + "step": 2035 + }, + { + "epoch": 0.43, + "learning_rate": 2.554723132445021e-05, + "loss": 1.2932, + "step": 2036 + }, + { + "epoch": 0.43, + "learning_rate": 2.5534138152493863e-05, + "loss": 1.2327, + "step": 2037 + }, + { + "epoch": 0.43, + "learning_rate": 2.5521042411564866e-05, + "loss": 1.2842, + "step": 2038 + }, + { + "epoch": 0.43, + "learning_rate": 2.5507944107742314e-05, + "loss": 1.2342, + "step": 2039 + }, + { + "epoch": 0.43, + "learning_rate": 2.549484324710652e-05, + "loss": 1.3055, + "step": 2040 + }, + { + "epoch": 0.43, + "learning_rate": 2.548173983573895e-05, + "loss": 1.2898, + "step": 2041 + }, + { + "epoch": 0.43, + "learning_rate": 2.5468633879722272e-05, + "loss": 1.2622, + "step": 2042 + }, + { + "epoch": 0.43, + "learning_rate": 2.545552538514033e-05, + "loss": 1.3072, + "step": 2043 + }, + { + "epoch": 0.43, + "learning_rate": 2.5442414358078148e-05, + "loss": 1.2473, + "step": 2044 + }, + { + "epoch": 0.43, + "learning_rate": 2.5429300804621934e-05, + "loss": 1.2588, + "step": 2045 + }, + { + "epoch": 0.43, + "learning_rate": 2.541618473085905e-05, + "loss": 1.293, + "step": 2046 + }, + { + "epoch": 0.43, + "learning_rate": 2.5403066142878047e-05, + "loss": 1.2812, + "step": 2047 + }, + { + "epoch": 0.43, + "learning_rate": 2.538994504676862e-05, + "loss": 1.2819, + "step": 2048 + }, + { + "epoch": 0.43, + "learning_rate": 2.537682144862166e-05, + "loss": 1.2955, + "step": 2049 + }, + { + "epoch": 0.43, + "learning_rate": 2.53636953545292e-05, + "loss": 1.2705, + "step": 2050 + }, + { + "epoch": 0.43, + "learning_rate": 2.5350566770584423e-05, + "loss": 1.303, + "step": 2051 + }, + { + "epoch": 0.43, + "learning_rate": 2.5337435702881683e-05, + "loss": 1.2637, + "step": 2052 + }, + { + "epoch": 0.43, + "learning_rate": 2.5324302157516486e-05, + "loss": 1.2481, + "step": 2053 + }, + { + "epoch": 0.43, + "learning_rate": 2.531116614058548e-05, + "loss": 1.3105, + "step": 2054 + }, + { + "epoch": 0.43, + "learning_rate": 2.5298027658186472e-05, + "loss": 1.2549, + "step": 2055 + }, + { + "epoch": 0.43, + "learning_rate": 2.52848867164184e-05, + "loss": 1.2724, + "step": 2056 + }, + { + "epoch": 0.43, + "learning_rate": 2.5271743321381354e-05, + "loss": 1.2496, + "step": 2057 + }, + { + "epoch": 0.43, + "learning_rate": 2.525859747917656e-05, + "loss": 1.2614, + "step": 2058 + }, + { + "epoch": 0.43, + "learning_rate": 2.524544919590638e-05, + "loss": 1.2755, + "step": 2059 + }, + { + "epoch": 0.43, + "learning_rate": 2.5232298477674297e-05, + "loss": 1.2589, + "step": 2060 + }, + { + "epoch": 0.43, + "learning_rate": 2.5219145330584945e-05, + "loss": 1.2813, + "step": 2061 + }, + { + "epoch": 0.43, + "learning_rate": 2.5205989760744084e-05, + "loss": 1.2259, + "step": 2062 + }, + { + "epoch": 0.43, + "learning_rate": 2.5192831774258575e-05, + "loss": 1.2194, + "step": 2063 + }, + { + "epoch": 0.43, + "learning_rate": 2.5179671377236422e-05, + "loss": 1.2996, + "step": 2064 + }, + { + "epoch": 0.43, + "learning_rate": 2.516650857578674e-05, + "loss": 1.2554, + "step": 2065 + }, + { + "epoch": 0.43, + "learning_rate": 2.515334337601977e-05, + "loss": 1.2868, + "step": 2066 + }, + { + "epoch": 0.43, + "learning_rate": 2.5140175784046858e-05, + "loss": 1.2312, + "step": 2067 + }, + { + "epoch": 0.43, + "learning_rate": 2.512700580598045e-05, + "loss": 1.3, + "step": 2068 + }, + { + "epoch": 0.44, + "learning_rate": 2.5113833447934126e-05, + "loss": 1.3146, + "step": 2069 + }, + { + "epoch": 0.44, + "learning_rate": 2.510065871602255e-05, + "loss": 1.2799, + "step": 2070 + }, + { + "epoch": 0.44, + "learning_rate": 2.5087481616361493e-05, + "loss": 1.2765, + "step": 2071 + }, + { + "epoch": 0.44, + "learning_rate": 2.5074302155067823e-05, + "loss": 1.2673, + "step": 2072 + }, + { + "epoch": 0.44, + "learning_rate": 2.5061120338259512e-05, + "loss": 1.2831, + "step": 2073 + }, + { + "epoch": 0.44, + "learning_rate": 2.5047936172055613e-05, + "loss": 1.2831, + "step": 2074 + }, + { + "epoch": 0.44, + "learning_rate": 2.5034749662576293e-05, + "loss": 1.3215, + "step": 2075 + }, + { + "epoch": 0.44, + "learning_rate": 2.5021560815942777e-05, + "loss": 1.2723, + "step": 2076 + }, + { + "epoch": 0.44, + "learning_rate": 2.5008369638277382e-05, + "loss": 1.2905, + "step": 2077 + }, + { + "epoch": 0.44, + "learning_rate": 2.4995176135703533e-05, + "loss": 1.2612, + "step": 2078 + }, + { + "epoch": 0.44, + "learning_rate": 2.498198031434571e-05, + "loss": 1.3296, + "step": 2079 + }, + { + "epoch": 0.44, + "learning_rate": 2.496878218032947e-05, + "loss": 1.2701, + "step": 2080 + }, + { + "epoch": 0.44, + "learning_rate": 2.495558173978145e-05, + "loss": 1.2911, + "step": 2081 + }, + { + "epoch": 0.44, + "learning_rate": 2.494237899882935e-05, + "loss": 1.3086, + "step": 2082 + }, + { + "epoch": 0.44, + "learning_rate": 2.4929173963601958e-05, + "loss": 1.2568, + "step": 2083 + }, + { + "epoch": 0.44, + "learning_rate": 2.4915966640229098e-05, + "loss": 1.2638, + "step": 2084 + }, + { + "epoch": 0.44, + "learning_rate": 2.4902757034841674e-05, + "loss": 1.2551, + "step": 2085 + }, + { + "epoch": 0.44, + "learning_rate": 2.4889545153571657e-05, + "loss": 1.3159, + "step": 2086 + }, + { + "epoch": 0.44, + "learning_rate": 2.4876331002552055e-05, + "loss": 1.2429, + "step": 2087 + }, + { + "epoch": 0.44, + "learning_rate": 2.4863114587916933e-05, + "loss": 1.2593, + "step": 2088 + }, + { + "epoch": 0.44, + "learning_rate": 2.484989591580142e-05, + "loss": 1.29, + "step": 2089 + }, + { + "epoch": 0.44, + "learning_rate": 2.4836674992341684e-05, + "loss": 1.2493, + "step": 2090 + }, + { + "epoch": 0.44, + "learning_rate": 2.4823451823674943e-05, + "loss": 1.2658, + "step": 2091 + }, + { + "epoch": 0.44, + "learning_rate": 2.481022641593944e-05, + "loss": 1.2678, + "step": 2092 + }, + { + "epoch": 0.44, + "learning_rate": 2.4796998775274482e-05, + "loss": 1.2909, + "step": 2093 + }, + { + "epoch": 0.44, + "learning_rate": 2.4783768907820403e-05, + "loss": 1.3031, + "step": 2094 + }, + { + "epoch": 0.44, + "learning_rate": 2.4770536819718562e-05, + "loss": 1.2859, + "step": 2095 + }, + { + "epoch": 0.44, + "learning_rate": 2.475730251711136e-05, + "loss": 1.2783, + "step": 2096 + }, + { + "epoch": 0.44, + "learning_rate": 2.4744066006142218e-05, + "loss": 1.2627, + "step": 2097 + }, + { + "epoch": 0.44, + "learning_rate": 2.4730827292955592e-05, + "loss": 1.2579, + "step": 2098 + }, + { + "epoch": 0.44, + "learning_rate": 2.4717586383696947e-05, + "loss": 1.2372, + "step": 2099 + }, + { + "epoch": 0.44, + "learning_rate": 2.470434328451278e-05, + "loss": 1.2901, + "step": 2100 + }, + { + "epoch": 0.44, + "learning_rate": 2.4691098001550588e-05, + "loss": 1.2395, + "step": 2101 + }, + { + "epoch": 0.44, + "learning_rate": 2.4677850540958906e-05, + "loss": 1.2388, + "step": 2102 + }, + { + "epoch": 0.44, + "learning_rate": 2.4664600908887272e-05, + "loss": 1.2986, + "step": 2103 + }, + { + "epoch": 0.44, + "learning_rate": 2.4651349111486212e-05, + "loss": 1.3011, + "step": 2104 + }, + { + "epoch": 0.44, + "learning_rate": 2.4638095154907276e-05, + "loss": 1.3088, + "step": 2105 + }, + { + "epoch": 0.44, + "learning_rate": 2.4624839045303014e-05, + "loss": 1.3081, + "step": 2106 + }, + { + "epoch": 0.44, + "learning_rate": 2.4611580788826973e-05, + "loss": 1.2722, + "step": 2107 + }, + { + "epoch": 0.44, + "learning_rate": 2.4598320391633702e-05, + "loss": 1.2768, + "step": 2108 + }, + { + "epoch": 0.44, + "learning_rate": 2.4585057859878732e-05, + "loss": 1.3224, + "step": 2109 + }, + { + "epoch": 0.44, + "learning_rate": 2.4571793199718593e-05, + "loss": 1.3039, + "step": 2110 + }, + { + "epoch": 0.44, + "learning_rate": 2.4558526417310805e-05, + "loss": 1.2917, + "step": 2111 + }, + { + "epoch": 0.44, + "learning_rate": 2.4545257518813866e-05, + "loss": 1.2374, + "step": 2112 + }, + { + "epoch": 0.44, + "learning_rate": 2.4531986510387268e-05, + "loss": 1.2449, + "step": 2113 + }, + { + "epoch": 0.44, + "learning_rate": 2.4518713398191464e-05, + "loss": 1.2528, + "step": 2114 + }, + { + "epoch": 0.44, + "learning_rate": 2.45054381883879e-05, + "loss": 1.2509, + "step": 2115 + }, + { + "epoch": 0.45, + "learning_rate": 2.4492160887138998e-05, + "loss": 1.2794, + "step": 2116 + }, + { + "epoch": 0.45, + "learning_rate": 2.447888150060813e-05, + "loss": 1.2612, + "step": 2117 + }, + { + "epoch": 0.45, + "learning_rate": 2.4465600034959654e-05, + "loss": 1.2718, + "step": 2118 + }, + { + "epoch": 0.45, + "learning_rate": 2.4452316496358885e-05, + "loss": 1.313, + "step": 2119 + }, + { + "epoch": 0.45, + "learning_rate": 2.443903089097211e-05, + "loss": 1.2596, + "step": 2120 + }, + { + "epoch": 0.45, + "learning_rate": 2.4425743224966567e-05, + "loss": 1.267, + "step": 2121 + }, + { + "epoch": 0.45, + "learning_rate": 2.4412453504510447e-05, + "loss": 1.2455, + "step": 2122 + }, + { + "epoch": 0.45, + "learning_rate": 2.43991617357729e-05, + "loss": 1.3376, + "step": 2123 + }, + { + "epoch": 0.45, + "learning_rate": 2.4385867924924037e-05, + "loss": 1.3297, + "step": 2124 + }, + { + "epoch": 0.45, + "learning_rate": 2.43725720781349e-05, + "loss": 1.2579, + "step": 2125 + }, + { + "epoch": 0.45, + "learning_rate": 2.4359274201577478e-05, + "loss": 1.3401, + "step": 2126 + }, + { + "epoch": 0.45, + "learning_rate": 2.4345974301424717e-05, + "loss": 1.2648, + "step": 2127 + }, + { + "epoch": 0.45, + "learning_rate": 2.433267238385048e-05, + "loss": 1.2432, + "step": 2128 + }, + { + "epoch": 0.45, + "learning_rate": 2.4319368455029598e-05, + "loss": 1.2588, + "step": 2129 + }, + { + "epoch": 0.45, + "learning_rate": 2.43060625211378e-05, + "loss": 1.288, + "step": 2130 + }, + { + "epoch": 0.45, + "learning_rate": 2.4292754588351768e-05, + "loss": 1.2282, + "step": 2131 + }, + { + "epoch": 0.45, + "learning_rate": 2.42794446628491e-05, + "loss": 1.293, + "step": 2132 + }, + { + "epoch": 0.45, + "learning_rate": 2.426613275080834e-05, + "loss": 1.307, + "step": 2133 + }, + { + "epoch": 0.45, + "learning_rate": 2.4252818858408923e-05, + "loss": 1.3079, + "step": 2134 + }, + { + "epoch": 0.45, + "learning_rate": 2.4239502991831233e-05, + "loss": 1.284, + "step": 2135 + }, + { + "epoch": 0.45, + "learning_rate": 2.4226185157256546e-05, + "loss": 1.2037, + "step": 2136 + }, + { + "epoch": 0.45, + "learning_rate": 2.421286536086707e-05, + "loss": 1.2519, + "step": 2137 + }, + { + "epoch": 0.45, + "learning_rate": 2.4199543608845916e-05, + "loss": 1.2597, + "step": 2138 + }, + { + "epoch": 0.45, + "learning_rate": 2.4186219907377097e-05, + "loss": 1.2732, + "step": 2139 + }, + { + "epoch": 0.45, + "learning_rate": 2.4172894262645544e-05, + "loss": 1.3214, + "step": 2140 + }, + { + "epoch": 0.45, + "learning_rate": 2.4159566680837086e-05, + "loss": 1.278, + "step": 2141 + }, + { + "epoch": 0.45, + "learning_rate": 2.414623716813844e-05, + "loss": 1.2699, + "step": 2142 + }, + { + "epoch": 0.45, + "learning_rate": 2.413290573073723e-05, + "loss": 1.3015, + "step": 2143 + }, + { + "epoch": 0.45, + "learning_rate": 2.4119572374821968e-05, + "loss": 1.2712, + "step": 2144 + }, + { + "epoch": 0.45, + "learning_rate": 2.4106237106582072e-05, + "loss": 1.2545, + "step": 2145 + }, + { + "epoch": 0.45, + "learning_rate": 2.4092899932207824e-05, + "loss": 1.2726, + "step": 2146 + }, + { + "epoch": 0.45, + "learning_rate": 2.4079560857890405e-05, + "loss": 1.2493, + "step": 2147 + }, + { + "epoch": 0.45, + "learning_rate": 2.406621988982188e-05, + "loss": 1.2737, + "step": 2148 + }, + { + "epoch": 0.45, + "learning_rate": 2.405287703419518e-05, + "loss": 1.2957, + "step": 2149 + }, + { + "epoch": 0.45, + "learning_rate": 2.4039532297204125e-05, + "loss": 1.2383, + "step": 2150 + }, + { + "epoch": 0.45, + "learning_rate": 2.4026185685043405e-05, + "loss": 1.2334, + "step": 2151 + }, + { + "epoch": 0.45, + "learning_rate": 2.4012837203908582e-05, + "loss": 1.2657, + "step": 2152 + }, + { + "epoch": 0.45, + "learning_rate": 2.3999486859996073e-05, + "loss": 1.2939, + "step": 2153 + }, + { + "epoch": 0.45, + "learning_rate": 2.3986134659503187e-05, + "loss": 1.246, + "step": 2154 + }, + { + "epoch": 0.45, + "learning_rate": 2.3972780608628057e-05, + "loss": 1.3431, + "step": 2155 + }, + { + "epoch": 0.45, + "learning_rate": 2.3959424713569708e-05, + "loss": 1.2937, + "step": 2156 + }, + { + "epoch": 0.45, + "learning_rate": 2.3946066980528e-05, + "loss": 1.2899, + "step": 2157 + }, + { + "epoch": 0.45, + "learning_rate": 2.3932707415703673e-05, + "loss": 1.2751, + "step": 2158 + }, + { + "epoch": 0.45, + "learning_rate": 2.391934602529828e-05, + "loss": 1.2325, + "step": 2159 + }, + { + "epoch": 0.45, + "learning_rate": 2.3905982815514243e-05, + "loss": 1.2628, + "step": 2160 + }, + { + "epoch": 0.45, + "learning_rate": 2.3892617792554833e-05, + "loss": 1.2681, + "step": 2161 + }, + { + "epoch": 0.45, + "learning_rate": 2.3879250962624152e-05, + "loss": 1.29, + "step": 2162 + }, + { + "epoch": 0.45, + "learning_rate": 2.386588233192715e-05, + "loss": 1.2623, + "step": 2163 + }, + { + "epoch": 0.46, + "learning_rate": 2.38525119066696e-05, + "loss": 1.29, + "step": 2164 + }, + { + "epoch": 0.46, + "learning_rate": 2.3839139693058116e-05, + "loss": 1.268, + "step": 2165 + }, + { + "epoch": 0.46, + "learning_rate": 2.382576569730015e-05, + "loss": 1.2672, + "step": 2166 + }, + { + "epoch": 0.46, + "learning_rate": 2.3812389925603963e-05, + "loss": 1.226, + "step": 2167 + }, + { + "epoch": 0.46, + "learning_rate": 2.3799012384178654e-05, + "loss": 1.2533, + "step": 2168 + }, + { + "epoch": 0.46, + "learning_rate": 2.3785633079234144e-05, + "loss": 1.2878, + "step": 2169 + }, + { + "epoch": 0.46, + "learning_rate": 2.377225201698117e-05, + "loss": 1.2776, + "step": 2170 + }, + { + "epoch": 0.46, + "learning_rate": 2.3758869203631266e-05, + "loss": 1.3176, + "step": 2171 + }, + { + "epoch": 0.46, + "learning_rate": 2.3745484645396816e-05, + "loss": 1.2473, + "step": 2172 + }, + { + "epoch": 0.46, + "learning_rate": 2.373209834849098e-05, + "loss": 1.2396, + "step": 2173 + }, + { + "epoch": 0.46, + "learning_rate": 2.3718710319127755e-05, + "loss": 1.2944, + "step": 2174 + }, + { + "epoch": 0.46, + "learning_rate": 2.370532056352191e-05, + "loss": 1.2887, + "step": 2175 + }, + { + "epoch": 0.46, + "learning_rate": 2.3691929087889042e-05, + "loss": 1.2868, + "step": 2176 + }, + { + "epoch": 0.46, + "learning_rate": 2.3678535898445533e-05, + "loss": 1.2609, + "step": 2177 + }, + { + "epoch": 0.46, + "learning_rate": 2.3665141001408562e-05, + "loss": 1.2767, + "step": 2178 + }, + { + "epoch": 0.46, + "learning_rate": 2.3651744402996114e-05, + "loss": 1.2674, + "step": 2179 + }, + { + "epoch": 0.46, + "learning_rate": 2.3638346109426932e-05, + "loss": 1.2839, + "step": 2180 + }, + { + "epoch": 0.46, + "learning_rate": 2.362494612692058e-05, + "loss": 1.3094, + "step": 2181 + }, + { + "epoch": 0.46, + "learning_rate": 2.361154446169739e-05, + "loss": 1.2948, + "step": 2182 + }, + { + "epoch": 0.46, + "learning_rate": 2.3598141119978482e-05, + "loss": 1.3063, + "step": 2183 + }, + { + "epoch": 0.46, + "learning_rate": 2.3584736107985737e-05, + "loss": 1.2767, + "step": 2184 + }, + { + "epoch": 0.46, + "learning_rate": 2.357132943194183e-05, + "loss": 1.2409, + "step": 2185 + }, + { + "epoch": 0.46, + "learning_rate": 2.35579210980702e-05, + "loss": 1.3158, + "step": 2186 + }, + { + "epoch": 0.46, + "learning_rate": 2.3544511112595068e-05, + "loss": 1.2893, + "step": 2187 + }, + { + "epoch": 0.46, + "learning_rate": 2.3531099481741403e-05, + "loss": 1.2235, + "step": 2188 + }, + { + "epoch": 0.46, + "learning_rate": 2.351768621173495e-05, + "loss": 1.2892, + "step": 2189 + }, + { + "epoch": 0.46, + "learning_rate": 2.3504271308802204e-05, + "loss": 1.2643, + "step": 2190 + }, + { + "epoch": 0.46, + "learning_rate": 2.3490854779170436e-05, + "loss": 1.293, + "step": 2191 + }, + { + "epoch": 0.46, + "learning_rate": 2.347743662906765e-05, + "loss": 1.2984, + "step": 2192 + }, + { + "epoch": 0.46, + "learning_rate": 2.3464016864722625e-05, + "loss": 1.2651, + "step": 2193 + }, + { + "epoch": 0.46, + "learning_rate": 2.345059549236487e-05, + "loss": 1.2821, + "step": 2194 + }, + { + "epoch": 0.46, + "learning_rate": 2.343717251822465e-05, + "loss": 1.2066, + "step": 2195 + }, + { + "epoch": 0.46, + "learning_rate": 2.3423747948532976e-05, + "loss": 1.2939, + "step": 2196 + }, + { + "epoch": 0.46, + "learning_rate": 2.341032178952159e-05, + "loss": 1.2487, + "step": 2197 + }, + { + "epoch": 0.46, + "learning_rate": 2.339689404742298e-05, + "loss": 1.2796, + "step": 2198 + }, + { + "epoch": 0.46, + "learning_rate": 2.338346472847037e-05, + "loss": 1.2045, + "step": 2199 + }, + { + "epoch": 0.46, + "learning_rate": 2.3370033838897702e-05, + "loss": 1.2245, + "step": 2200 + }, + { + "epoch": 0.46, + "learning_rate": 2.3356601384939665e-05, + "loss": 1.3075, + "step": 2201 + }, + { + "epoch": 0.46, + "learning_rate": 2.3343167372831665e-05, + "loss": 1.309, + "step": 2202 + }, + { + "epoch": 0.46, + "learning_rate": 2.3329731808809836e-05, + "loss": 1.3084, + "step": 2203 + }, + { + "epoch": 0.46, + "learning_rate": 2.331629469911103e-05, + "loss": 1.2633, + "step": 2204 + }, + { + "epoch": 0.46, + "learning_rate": 2.330285604997281e-05, + "loss": 1.2671, + "step": 2205 + }, + { + "epoch": 0.46, + "learning_rate": 2.328941586763346e-05, + "loss": 1.2656, + "step": 2206 + }, + { + "epoch": 0.46, + "learning_rate": 2.3275974158331977e-05, + "loss": 1.2829, + "step": 2207 + }, + { + "epoch": 0.46, + "learning_rate": 2.3262530928308068e-05, + "loss": 1.2457, + "step": 2208 + }, + { + "epoch": 0.46, + "learning_rate": 2.3249086183802137e-05, + "loss": 1.2854, + "step": 2209 + }, + { + "epoch": 0.46, + "learning_rate": 2.32356399310553e-05, + "loss": 1.2401, + "step": 2210 + }, + { + "epoch": 0.47, + "learning_rate": 2.3222192176309367e-05, + "loss": 1.2986, + "step": 2211 + }, + { + "epoch": 0.47, + "learning_rate": 2.320874292580685e-05, + "loss": 1.3068, + "step": 2212 + }, + { + "epoch": 0.47, + "learning_rate": 2.3195292185790957e-05, + "loss": 1.3193, + "step": 2213 + }, + { + "epoch": 0.47, + "learning_rate": 2.318183996250558e-05, + "loss": 1.2133, + "step": 2214 + }, + { + "epoch": 0.47, + "learning_rate": 2.3168386262195307e-05, + "loss": 1.2821, + "step": 2215 + }, + { + "epoch": 0.47, + "learning_rate": 2.315493109110541e-05, + "loss": 1.2341, + "step": 2216 + }, + { + "epoch": 0.47, + "learning_rate": 2.314147445548183e-05, + "loss": 1.2578, + "step": 2217 + }, + { + "epoch": 0.47, + "learning_rate": 2.3128016361571213e-05, + "loss": 1.3141, + "step": 2218 + }, + { + "epoch": 0.47, + "learning_rate": 2.3114556815620863e-05, + "loss": 1.3132, + "step": 2219 + }, + { + "epoch": 0.47, + "learning_rate": 2.3101095823878764e-05, + "loss": 1.3046, + "step": 2220 + }, + { + "epoch": 0.47, + "learning_rate": 2.308763339259357e-05, + "loss": 1.2741, + "step": 2221 + }, + { + "epoch": 0.47, + "learning_rate": 2.3074169528014605e-05, + "loss": 1.2767, + "step": 2222 + }, + { + "epoch": 0.47, + "learning_rate": 2.306070423639186e-05, + "loss": 1.264, + "step": 2223 + }, + { + "epoch": 0.47, + "learning_rate": 2.3047237523975984e-05, + "loss": 1.263, + "step": 2224 + }, + { + "epoch": 0.47, + "learning_rate": 2.3033769397018286e-05, + "loss": 1.2069, + "step": 2225 + }, + { + "epoch": 0.47, + "learning_rate": 2.3020299861770732e-05, + "loss": 1.2284, + "step": 2226 + }, + { + "epoch": 0.47, + "learning_rate": 2.300682892448595e-05, + "loss": 1.2452, + "step": 2227 + }, + { + "epoch": 0.47, + "learning_rate": 2.2993356591417203e-05, + "loss": 1.2717, + "step": 2228 + }, + { + "epoch": 0.47, + "learning_rate": 2.2979882868818422e-05, + "loss": 1.263, + "step": 2229 + }, + { + "epoch": 0.47, + "learning_rate": 2.296640776294416e-05, + "loss": 1.2966, + "step": 2230 + }, + { + "epoch": 0.47, + "learning_rate": 2.2952931280049628e-05, + "loss": 1.2989, + "step": 2231 + }, + { + "epoch": 0.47, + "learning_rate": 2.293945342639067e-05, + "loss": 1.2976, + "step": 2232 + }, + { + "epoch": 0.47, + "learning_rate": 2.2925974208223778e-05, + "loss": 1.27, + "step": 2233 + }, + { + "epoch": 0.47, + "learning_rate": 2.2912493631806055e-05, + "loss": 1.2463, + "step": 2234 + }, + { + "epoch": 0.47, + "learning_rate": 2.2899011703395254e-05, + "loss": 1.2726, + "step": 2235 + }, + { + "epoch": 0.47, + "learning_rate": 2.288552842924974e-05, + "loss": 1.2412, + "step": 2236 + }, + { + "epoch": 0.47, + "learning_rate": 2.2872043815628525e-05, + "loss": 1.2547, + "step": 2237 + }, + { + "epoch": 0.47, + "learning_rate": 2.2858557868791222e-05, + "loss": 1.289, + "step": 2238 + }, + { + "epoch": 0.47, + "learning_rate": 2.284507059499807e-05, + "loss": 1.2979, + "step": 2239 + }, + { + "epoch": 0.47, + "learning_rate": 2.283158200050993e-05, + "loss": 1.2521, + "step": 2240 + }, + { + "epoch": 0.47, + "learning_rate": 2.2818092091588266e-05, + "loss": 1.2675, + "step": 2241 + }, + { + "epoch": 0.47, + "learning_rate": 2.280460087449515e-05, + "loss": 1.2783, + "step": 2242 + }, + { + "epoch": 0.47, + "learning_rate": 2.2791108355493278e-05, + "loss": 1.2532, + "step": 2243 + }, + { + "epoch": 0.47, + "learning_rate": 2.2777614540845934e-05, + "loss": 1.2963, + "step": 2244 + }, + { + "epoch": 0.47, + "learning_rate": 2.2764119436817015e-05, + "loss": 1.2672, + "step": 2245 + }, + { + "epoch": 0.47, + "learning_rate": 2.2750623049671003e-05, + "loss": 1.2974, + "step": 2246 + }, + { + "epoch": 0.47, + "learning_rate": 2.273712538567299e-05, + "loss": 1.2806, + "step": 2247 + }, + { + "epoch": 0.47, + "learning_rate": 2.2723626451088644e-05, + "loss": 1.3214, + "step": 2248 + }, + { + "epoch": 0.47, + "learning_rate": 2.2710126252184255e-05, + "loss": 1.307, + "step": 2249 + }, + { + "epoch": 0.47, + "learning_rate": 2.2696624795226662e-05, + "loss": 1.2484, + "step": 2250 + }, + { + "epoch": 0.47, + "learning_rate": 2.2683122086483297e-05, + "loss": 1.24, + "step": 2251 + }, + { + "epoch": 0.47, + "learning_rate": 2.26696181322222e-05, + "loss": 1.2795, + "step": 2252 + }, + { + "epoch": 0.47, + "learning_rate": 2.2656112938711952e-05, + "loss": 1.2912, + "step": 2253 + }, + { + "epoch": 0.47, + "learning_rate": 2.264260651222174e-05, + "loss": 1.2853, + "step": 2254 + }, + { + "epoch": 0.47, + "learning_rate": 2.26290988590213e-05, + "loss": 1.2958, + "step": 2255 + }, + { + "epoch": 0.47, + "learning_rate": 2.261558998538095e-05, + "loss": 1.2503, + "step": 2256 + }, + { + "epoch": 0.47, + "learning_rate": 2.2602079897571576e-05, + "loss": 1.295, + "step": 2257 + }, + { + "epoch": 0.47, + "learning_rate": 2.258856860186462e-05, + "loss": 1.269, + "step": 2258 + }, + { + "epoch": 0.48, + "learning_rate": 2.257505610453209e-05, + "loss": 1.2443, + "step": 2259 + }, + { + "epoch": 0.48, + "learning_rate": 2.2561542411846537e-05, + "loss": 1.3001, + "step": 2260 + }, + { + "epoch": 0.48, + "learning_rate": 2.25480275300811e-05, + "loss": 1.2766, + "step": 2261 + }, + { + "epoch": 0.48, + "learning_rate": 2.253451146550945e-05, + "loss": 1.3115, + "step": 2262 + }, + { + "epoch": 0.48, + "learning_rate": 2.252099422440579e-05, + "loss": 1.3036, + "step": 2263 + }, + { + "epoch": 0.48, + "learning_rate": 2.2507475813044896e-05, + "loss": 1.3008, + "step": 2264 + }, + { + "epoch": 0.48, + "learning_rate": 2.2493956237702075e-05, + "loss": 1.2758, + "step": 2265 + }, + { + "epoch": 0.48, + "learning_rate": 2.2480435504653185e-05, + "loss": 1.2717, + "step": 2266 + }, + { + "epoch": 0.48, + "learning_rate": 2.24669136201746e-05, + "loss": 1.2827, + "step": 2267 + }, + { + "epoch": 0.48, + "learning_rate": 2.2453390590543246e-05, + "loss": 1.2494, + "step": 2268 + }, + { + "epoch": 0.48, + "learning_rate": 2.243986642203658e-05, + "loss": 1.2719, + "step": 2269 + }, + { + "epoch": 0.48, + "learning_rate": 2.2426341120932582e-05, + "loss": 1.2765, + "step": 2270 + }, + { + "epoch": 0.48, + "learning_rate": 2.241281469350976e-05, + "loss": 1.2572, + "step": 2271 + }, + { + "epoch": 0.48, + "learning_rate": 2.2399287146047137e-05, + "loss": 1.2562, + "step": 2272 + }, + { + "epoch": 0.48, + "learning_rate": 2.2385758484824275e-05, + "loss": 1.2728, + "step": 2273 + }, + { + "epoch": 0.48, + "learning_rate": 2.2372228716121246e-05, + "loss": 1.3079, + "step": 2274 + }, + { + "epoch": 0.48, + "learning_rate": 2.235869784621861e-05, + "loss": 1.2781, + "step": 2275 + }, + { + "epoch": 0.48, + "learning_rate": 2.2345165881397475e-05, + "loss": 1.298, + "step": 2276 + }, + { + "epoch": 0.48, + "learning_rate": 2.2331632827939438e-05, + "loss": 1.2634, + "step": 2277 + }, + { + "epoch": 0.48, + "learning_rate": 2.231809869212661e-05, + "loss": 1.2548, + "step": 2278 + }, + { + "epoch": 0.48, + "learning_rate": 2.230456348024159e-05, + "loss": 1.2609, + "step": 2279 + }, + { + "epoch": 0.48, + "learning_rate": 2.2291027198567502e-05, + "loss": 1.304, + "step": 2280 + }, + { + "epoch": 0.48, + "learning_rate": 2.2277489853387932e-05, + "loss": 1.2894, + "step": 2281 + }, + { + "epoch": 0.48, + "learning_rate": 2.2263951450986987e-05, + "loss": 1.2655, + "step": 2282 + }, + { + "epoch": 0.48, + "learning_rate": 2.2250411997649266e-05, + "loss": 1.2448, + "step": 2283 + }, + { + "epoch": 0.48, + "learning_rate": 2.2236871499659824e-05, + "loss": 1.3025, + "step": 2284 + }, + { + "epoch": 0.48, + "learning_rate": 2.2223329963304242e-05, + "loss": 1.2805, + "step": 2285 + }, + { + "epoch": 0.48, + "learning_rate": 2.2209787394868562e-05, + "loss": 1.2748, + "step": 2286 + }, + { + "epoch": 0.48, + "learning_rate": 2.2196243800639303e-05, + "loss": 1.2832, + "step": 2287 + }, + { + "epoch": 0.48, + "learning_rate": 2.2182699186903462e-05, + "loss": 1.2327, + "step": 2288 + }, + { + "epoch": 0.48, + "learning_rate": 2.2169153559948513e-05, + "loss": 1.303, + "step": 2289 + }, + { + "epoch": 0.48, + "learning_rate": 2.21556069260624e-05, + "loss": 1.2921, + "step": 2290 + }, + { + "epoch": 0.48, + "learning_rate": 2.2142059291533542e-05, + "loss": 1.2519, + "step": 2291 + }, + { + "epoch": 0.48, + "learning_rate": 2.2128510662650796e-05, + "loss": 1.2517, + "step": 2292 + }, + { + "epoch": 0.48, + "learning_rate": 2.211496104570351e-05, + "loss": 1.2842, + "step": 2293 + }, + { + "epoch": 0.48, + "learning_rate": 2.210141044698148e-05, + "loss": 1.2987, + "step": 2294 + }, + { + "epoch": 0.48, + "learning_rate": 2.2087858872774954e-05, + "loss": 1.2548, + "step": 2295 + }, + { + "epoch": 0.48, + "learning_rate": 2.2074306329374636e-05, + "loss": 1.3158, + "step": 2296 + }, + { + "epoch": 0.48, + "learning_rate": 2.206075282307168e-05, + "loss": 1.2867, + "step": 2297 + }, + { + "epoch": 0.48, + "learning_rate": 2.2047198360157683e-05, + "loss": 1.282, + "step": 2298 + }, + { + "epoch": 0.48, + "learning_rate": 2.2033642946924698e-05, + "loss": 1.2701, + "step": 2299 + }, + { + "epoch": 0.48, + "learning_rate": 2.2020086589665203e-05, + "loss": 1.23, + "step": 2300 + }, + { + "epoch": 0.48, + "learning_rate": 2.2006529294672126e-05, + "loss": 1.2628, + "step": 2301 + }, + { + "epoch": 0.48, + "learning_rate": 2.1992971068238826e-05, + "loss": 1.2468, + "step": 2302 + }, + { + "epoch": 0.48, + "learning_rate": 2.197941191665909e-05, + "loss": 1.269, + "step": 2303 + }, + { + "epoch": 0.48, + "learning_rate": 2.196585184622715e-05, + "loss": 1.2319, + "step": 2304 + }, + { + "epoch": 0.48, + "learning_rate": 2.195229086323764e-05, + "loss": 1.2721, + "step": 2305 + }, + { + "epoch": 0.49, + "learning_rate": 2.193872897398564e-05, + "loss": 1.2655, + "step": 2306 + }, + { + "epoch": 0.49, + "learning_rate": 2.1925166184766636e-05, + "loss": 1.2863, + "step": 2307 + }, + { + "epoch": 0.49, + "learning_rate": 2.1911602501876546e-05, + "loss": 1.2351, + "step": 2308 + }, + { + "epoch": 0.49, + "learning_rate": 2.1898037931611688e-05, + "loss": 1.2753, + "step": 2309 + }, + { + "epoch": 0.49, + "learning_rate": 2.1884472480268806e-05, + "loss": 1.2352, + "step": 2310 + }, + { + "epoch": 0.49, + "learning_rate": 2.1870906154145035e-05, + "loss": 1.2392, + "step": 2311 + }, + { + "epoch": 0.49, + "learning_rate": 2.185733895953794e-05, + "loss": 1.2358, + "step": 2312 + }, + { + "epoch": 0.49, + "learning_rate": 2.1843770902745462e-05, + "loss": 1.283, + "step": 2313 + }, + { + "epoch": 0.49, + "learning_rate": 2.1830201990065966e-05, + "loss": 1.2299, + "step": 2314 + }, + { + "epoch": 0.49, + "learning_rate": 2.1816632227798196e-05, + "loss": 1.3021, + "step": 2315 + }, + { + "epoch": 0.49, + "learning_rate": 2.180306162224131e-05, + "loss": 1.2538, + "step": 2316 + }, + { + "epoch": 0.49, + "learning_rate": 2.1789490179694833e-05, + "loss": 1.2436, + "step": 2317 + }, + { + "epoch": 0.49, + "learning_rate": 2.1775917906458698e-05, + "loss": 1.2869, + "step": 2318 + }, + { + "epoch": 0.49, + "learning_rate": 2.176234480883322e-05, + "loss": 1.2631, + "step": 2319 + }, + { + "epoch": 0.49, + "learning_rate": 2.174877089311909e-05, + "loss": 1.2608, + "step": 2320 + }, + { + "epoch": 0.49, + "learning_rate": 2.1735196165617385e-05, + "loss": 1.1975, + "step": 2321 + }, + { + "epoch": 0.49, + "learning_rate": 2.1721620632629552e-05, + "loss": 1.2808, + "step": 2322 + }, + { + "epoch": 0.49, + "learning_rate": 2.1708044300457423e-05, + "loss": 1.2342, + "step": 2323 + }, + { + "epoch": 0.49, + "learning_rate": 2.1694467175403197e-05, + "loss": 1.3237, + "step": 2324 + }, + { + "epoch": 0.49, + "learning_rate": 2.1680889263769425e-05, + "loss": 1.2427, + "step": 2325 + }, + { + "epoch": 0.49, + "learning_rate": 2.166731057185905e-05, + "loss": 1.2862, + "step": 2326 + }, + { + "epoch": 0.49, + "learning_rate": 2.1653731105975355e-05, + "loss": 1.3005, + "step": 2327 + }, + { + "epoch": 0.49, + "learning_rate": 2.1640150872421997e-05, + "loss": 1.2493, + "step": 2328 + }, + { + "epoch": 0.49, + "learning_rate": 2.1626569877502985e-05, + "loss": 1.2957, + "step": 2329 + }, + { + "epoch": 0.49, + "learning_rate": 2.161298812752267e-05, + "loss": 1.2392, + "step": 2330 + }, + { + "epoch": 0.49, + "learning_rate": 2.1599405628785773e-05, + "loss": 1.247, + "step": 2331 + }, + { + "epoch": 0.49, + "learning_rate": 2.158582238759735e-05, + "loss": 1.3058, + "step": 2332 + }, + { + "epoch": 0.49, + "learning_rate": 2.15722384102628e-05, + "loss": 1.281, + "step": 2333 + }, + { + "epoch": 0.49, + "learning_rate": 2.1558653703087876e-05, + "loss": 1.226, + "step": 2334 + }, + { + "epoch": 0.49, + "learning_rate": 2.1545068272378664e-05, + "loss": 1.308, + "step": 2335 + }, + { + "epoch": 0.49, + "learning_rate": 2.1531482124441574e-05, + "loss": 1.264, + "step": 2336 + }, + { + "epoch": 0.49, + "learning_rate": 2.151789526558337e-05, + "loss": 1.2489, + "step": 2337 + }, + { + "epoch": 0.49, + "learning_rate": 2.1504307702111125e-05, + "loss": 1.2956, + "step": 2338 + }, + { + "epoch": 0.49, + "learning_rate": 2.1490719440332252e-05, + "loss": 1.2503, + "step": 2339 + }, + { + "epoch": 0.49, + "learning_rate": 2.147713048655449e-05, + "loss": 1.2541, + "step": 2340 + }, + { + "epoch": 0.49, + "learning_rate": 2.1463540847085892e-05, + "loss": 1.3193, + "step": 2341 + }, + { + "epoch": 0.49, + "learning_rate": 2.1449950528234828e-05, + "loss": 1.3139, + "step": 2342 + }, + { + "epoch": 0.49, + "learning_rate": 2.143635953630999e-05, + "loss": 1.2783, + "step": 2343 + }, + { + "epoch": 0.49, + "learning_rate": 2.1422767877620382e-05, + "loss": 1.2414, + "step": 2344 + }, + { + "epoch": 0.49, + "learning_rate": 2.1409175558475307e-05, + "loss": 1.2978, + "step": 2345 + }, + { + "epoch": 0.49, + "learning_rate": 2.1395582585184397e-05, + "loss": 1.2643, + "step": 2346 + }, + { + "epoch": 0.49, + "learning_rate": 2.138198896405756e-05, + "loss": 1.2579, + "step": 2347 + }, + { + "epoch": 0.49, + "learning_rate": 2.1368394701405023e-05, + "loss": 1.2258, + "step": 2348 + }, + { + "epoch": 0.49, + "learning_rate": 2.1354799803537312e-05, + "loss": 1.2374, + "step": 2349 + }, + { + "epoch": 0.49, + "learning_rate": 2.134120427676523e-05, + "loss": 1.2371, + "step": 2350 + }, + { + "epoch": 0.49, + "learning_rate": 2.1327608127399895e-05, + "loss": 1.2696, + "step": 2351 + }, + { + "epoch": 0.49, + "learning_rate": 2.1314011361752687e-05, + "loss": 1.2591, + "step": 2352 + }, + { + "epoch": 0.49, + "learning_rate": 2.1300413986135313e-05, + "loss": 1.2424, + "step": 2353 + }, + { + "epoch": 0.5, + "learning_rate": 2.128681600685971e-05, + "loss": 1.3086, + "step": 2354 + }, + { + "epoch": 0.5, + "learning_rate": 2.1273217430238146e-05, + "loss": 1.3011, + "step": 2355 + }, + { + "epoch": 0.5, + "learning_rate": 2.1259618262583122e-05, + "loss": 1.2495, + "step": 2356 + }, + { + "epoch": 0.5, + "learning_rate": 2.1246018510207452e-05, + "loss": 1.2858, + "step": 2357 + }, + { + "epoch": 0.5, + "learning_rate": 2.1232418179424204e-05, + "loss": 1.2344, + "step": 2358 + }, + { + "epoch": 0.5, + "learning_rate": 2.12188172765467e-05, + "loss": 1.221, + "step": 2359 + }, + { + "epoch": 0.5, + "learning_rate": 2.120521580788856e-05, + "loss": 1.2757, + "step": 2360 + }, + { + "epoch": 0.5, + "learning_rate": 2.1191613779763635e-05, + "loss": 1.2317, + "step": 2361 + }, + { + "epoch": 0.5, + "learning_rate": 2.1178011198486064e-05, + "loss": 1.2473, + "step": 2362 + }, + { + "epoch": 0.5, + "learning_rate": 2.1164408070370212e-05, + "loss": 1.276, + "step": 2363 + }, + { + "epoch": 0.5, + "learning_rate": 2.1150804401730724e-05, + "loss": 1.2718, + "step": 2364 + }, + { + "epoch": 0.5, + "learning_rate": 2.1137200198882484e-05, + "loss": 1.2227, + "step": 2365 + }, + { + "epoch": 0.5, + "learning_rate": 2.112359546814063e-05, + "loss": 1.2493, + "step": 2366 + }, + { + "epoch": 0.5, + "learning_rate": 2.110999021582053e-05, + "loss": 1.276, + "step": 2367 + }, + { + "epoch": 0.5, + "learning_rate": 2.1096384448237824e-05, + "loss": 1.2941, + "step": 2368 + }, + { + "epoch": 0.5, + "learning_rate": 2.1082778171708355e-05, + "loss": 1.224, + "step": 2369 + }, + { + "epoch": 0.5, + "learning_rate": 2.1069171392548226e-05, + "loss": 1.2903, + "step": 2370 + }, + { + "epoch": 0.5, + "learning_rate": 2.1055564117073767e-05, + "loss": 1.26, + "step": 2371 + }, + { + "epoch": 0.5, + "learning_rate": 2.1041956351601543e-05, + "loss": 1.2548, + "step": 2372 + }, + { + "epoch": 0.5, + "learning_rate": 2.1028348102448338e-05, + "loss": 1.1928, + "step": 2373 + }, + { + "epoch": 0.5, + "learning_rate": 2.1014739375931166e-05, + "loss": 1.27, + "step": 2374 + }, + { + "epoch": 0.5, + "learning_rate": 2.1001130178367256e-05, + "loss": 1.2655, + "step": 2375 + }, + { + "epoch": 0.5, + "learning_rate": 2.098752051607406e-05, + "loss": 1.2878, + "step": 2376 + }, + { + "epoch": 0.5, + "learning_rate": 2.097391039536926e-05, + "loss": 1.3185, + "step": 2377 + }, + { + "epoch": 0.5, + "learning_rate": 2.0960299822570728e-05, + "loss": 1.2573, + "step": 2378 + }, + { + "epoch": 0.5, + "learning_rate": 2.094668880399655e-05, + "loss": 1.294, + "step": 2379 + }, + { + "epoch": 0.5, + "learning_rate": 2.0933077345965032e-05, + "loss": 1.3151, + "step": 2380 + }, + { + "epoch": 0.5, + "learning_rate": 2.0919465454794672e-05, + "loss": 1.2751, + "step": 2381 + }, + { + "epoch": 0.5, + "learning_rate": 2.0905853136804173e-05, + "loss": 1.2886, + "step": 2382 + }, + { + "epoch": 0.5, + "learning_rate": 2.089224039831244e-05, + "loss": 1.2623, + "step": 2383 + }, + { + "epoch": 0.5, + "learning_rate": 2.087862724563857e-05, + "loss": 1.3, + "step": 2384 + }, + { + "epoch": 0.5, + "learning_rate": 2.0865013685101844e-05, + "loss": 1.2876, + "step": 2385 + }, + { + "epoch": 0.5, + "learning_rate": 2.085139972302175e-05, + "loss": 1.3102, + "step": 2386 + }, + { + "epoch": 0.5, + "learning_rate": 2.083778536571795e-05, + "loss": 1.2676, + "step": 2387 + }, + { + "epoch": 0.5, + "learning_rate": 2.0824170619510283e-05, + "loss": 1.2637, + "step": 2388 + }, + { + "epoch": 0.5, + "learning_rate": 2.0810555490718787e-05, + "loss": 1.2419, + "step": 2389 + }, + { + "epoch": 0.5, + "learning_rate": 2.0796939985663666e-05, + "loss": 1.2756, + "step": 2390 + }, + { + "epoch": 0.5, + "learning_rate": 2.0783324110665306e-05, + "loss": 1.275, + "step": 2391 + }, + { + "epoch": 0.5, + "learning_rate": 2.0769707872044242e-05, + "loss": 1.2722, + "step": 2392 + }, + { + "epoch": 0.5, + "learning_rate": 2.0756091276121212e-05, + "loss": 1.3089, + "step": 2393 + }, + { + "epoch": 0.5, + "learning_rate": 2.0742474329217094e-05, + "loss": 1.314, + "step": 2394 + }, + { + "epoch": 0.5, + "learning_rate": 2.0728857037652945e-05, + "loss": 1.2922, + "step": 2395 + }, + { + "epoch": 0.5, + "learning_rate": 2.0715239407749973e-05, + "loss": 1.2915, + "step": 2396 + }, + { + "epoch": 0.5, + "learning_rate": 2.070162144582954e-05, + "loss": 1.2634, + "step": 2397 + }, + { + "epoch": 0.5, + "learning_rate": 2.0688003158213172e-05, + "loss": 1.3197, + "step": 2398 + }, + { + "epoch": 0.5, + "learning_rate": 2.067438455122255e-05, + "loss": 1.2978, + "step": 2399 + }, + { + "epoch": 0.5, + "learning_rate": 2.0660765631179474e-05, + "loss": 1.2411, + "step": 2400 + }, + { + "epoch": 0.51, + "learning_rate": 2.0647146404405923e-05, + "loss": 1.2427, + "step": 2401 + }, + { + "epoch": 0.51, + "learning_rate": 2.0633526877224006e-05, + "loss": 1.2709, + "step": 2402 + }, + { + "epoch": 0.51, + "learning_rate": 2.061990705595597e-05, + "loss": 1.2201, + "step": 2403 + }, + { + "epoch": 0.51, + "learning_rate": 2.060628694692419e-05, + "loss": 1.2977, + "step": 2404 + }, + { + "epoch": 0.51, + "learning_rate": 2.0592666556451197e-05, + "loss": 1.2939, + "step": 2405 + }, + { + "epoch": 0.51, + "learning_rate": 2.0579045890859635e-05, + "loss": 1.2681, + "step": 2406 + }, + { + "epoch": 0.51, + "learning_rate": 2.0565424956472278e-05, + "loss": 1.3335, + "step": 2407 + }, + { + "epoch": 0.51, + "learning_rate": 2.055180375961203e-05, + "loss": 1.2893, + "step": 2408 + }, + { + "epoch": 0.51, + "learning_rate": 2.053818230660191e-05, + "loss": 1.2555, + "step": 2409 + }, + { + "epoch": 0.51, + "learning_rate": 2.052456060376506e-05, + "loss": 1.3004, + "step": 2410 + }, + { + "epoch": 0.51, + "learning_rate": 2.051093865742474e-05, + "loss": 1.2388, + "step": 2411 + }, + { + "epoch": 0.51, + "learning_rate": 2.0497316473904324e-05, + "loss": 1.2878, + "step": 2412 + }, + { + "epoch": 0.51, + "learning_rate": 2.048369405952729e-05, + "loss": 1.2574, + "step": 2413 + }, + { + "epoch": 0.51, + "learning_rate": 2.0470071420617222e-05, + "loss": 1.2572, + "step": 2414 + }, + { + "epoch": 0.51, + "learning_rate": 2.045644856349782e-05, + "loss": 1.2595, + "step": 2415 + }, + { + "epoch": 0.51, + "learning_rate": 2.0442825494492876e-05, + "loss": 1.2491, + "step": 2416 + }, + { + "epoch": 0.51, + "learning_rate": 2.0429202219926273e-05, + "loss": 1.2555, + "step": 2417 + }, + { + "epoch": 0.51, + "learning_rate": 2.0415578746122007e-05, + "loss": 1.3248, + "step": 2418 + }, + { + "epoch": 0.51, + "learning_rate": 2.0401955079404154e-05, + "loss": 1.2744, + "step": 2419 + }, + { + "epoch": 0.51, + "learning_rate": 2.0388331226096886e-05, + "loss": 1.2988, + "step": 2420 + }, + { + "epoch": 0.51, + "learning_rate": 2.0374707192524455e-05, + "loss": 1.2343, + "step": 2421 + }, + { + "epoch": 0.51, + "learning_rate": 2.036108298501121e-05, + "loss": 1.3385, + "step": 2422 + }, + { + "epoch": 0.51, + "learning_rate": 2.034745860988156e-05, + "loss": 1.2002, + "step": 2423 + }, + { + "epoch": 0.51, + "learning_rate": 2.0333834073460018e-05, + "loss": 1.2588, + "step": 2424 + }, + { + "epoch": 0.51, + "learning_rate": 2.032020938207114e-05, + "loss": 1.2447, + "step": 2425 + }, + { + "epoch": 0.51, + "learning_rate": 2.030658454203958e-05, + "loss": 1.2394, + "step": 2426 + }, + { + "epoch": 0.51, + "learning_rate": 2.029295955969005e-05, + "loss": 1.2984, + "step": 2427 + }, + { + "epoch": 0.51, + "learning_rate": 2.027933444134733e-05, + "loss": 1.2739, + "step": 2428 + }, + { + "epoch": 0.51, + "learning_rate": 2.0265709193336266e-05, + "loss": 1.3139, + "step": 2429 + }, + { + "epoch": 0.51, + "learning_rate": 2.025208382198176e-05, + "loss": 1.2836, + "step": 2430 + }, + { + "epoch": 0.51, + "learning_rate": 2.0238458333608766e-05, + "loss": 1.3205, + "step": 2431 + }, + { + "epoch": 0.51, + "learning_rate": 2.0224832734542314e-05, + "loss": 1.225, + "step": 2432 + }, + { + "epoch": 0.51, + "learning_rate": 2.0211207031107457e-05, + "loss": 1.2714, + "step": 2433 + }, + { + "epoch": 0.51, + "learning_rate": 2.0197581229629317e-05, + "loss": 1.2509, + "step": 2434 + }, + { + "epoch": 0.51, + "learning_rate": 2.018395533643305e-05, + "loss": 1.2631, + "step": 2435 + }, + { + "epoch": 0.51, + "learning_rate": 2.017032935784386e-05, + "loss": 1.2623, + "step": 2436 + }, + { + "epoch": 0.51, + "learning_rate": 2.0156703300186997e-05, + "loss": 1.2872, + "step": 2437 + }, + { + "epoch": 0.51, + "learning_rate": 2.0143077169787725e-05, + "loss": 1.2804, + "step": 2438 + }, + { + "epoch": 0.51, + "learning_rate": 2.012945097297137e-05, + "loss": 1.2716, + "step": 2439 + }, + { + "epoch": 0.51, + "learning_rate": 2.0115824716063273e-05, + "loss": 1.2535, + "step": 2440 + }, + { + "epoch": 0.51, + "learning_rate": 2.0102198405388806e-05, + "loss": 1.2782, + "step": 2441 + }, + { + "epoch": 0.51, + "learning_rate": 2.008857204727336e-05, + "loss": 1.2146, + "step": 2442 + }, + { + "epoch": 0.51, + "learning_rate": 2.0074945648042353e-05, + "loss": 1.2816, + "step": 2443 + }, + { + "epoch": 0.51, + "learning_rate": 2.0061319214021237e-05, + "loss": 1.2117, + "step": 2444 + }, + { + "epoch": 0.51, + "learning_rate": 2.0047692751535454e-05, + "loss": 1.2778, + "step": 2445 + }, + { + "epoch": 0.51, + "learning_rate": 2.0034066266910475e-05, + "loss": 1.2435, + "step": 2446 + }, + { + "epoch": 0.51, + "learning_rate": 2.0020439766471775e-05, + "loss": 1.2687, + "step": 2447 + }, + { + "epoch": 0.51, + "learning_rate": 2.000681325654484e-05, + "loss": 1.2329, + "step": 2448 + }, + { + "epoch": 0.52, + "learning_rate": 1.999318674345516e-05, + "loss": 1.2704, + "step": 2449 + }, + { + "epoch": 0.52, + "learning_rate": 1.997956023352823e-05, + "loss": 1.22, + "step": 2450 + }, + { + "epoch": 0.52, + "learning_rate": 1.9965933733089535e-05, + "loss": 1.2474, + "step": 2451 + }, + { + "epoch": 0.52, + "learning_rate": 1.995230724846455e-05, + "loss": 1.1917, + "step": 2452 + }, + { + "epoch": 0.52, + "learning_rate": 1.993868078597877e-05, + "loss": 1.2962, + "step": 2453 + }, + { + "epoch": 0.52, + "learning_rate": 1.9925054351957647e-05, + "loss": 1.289, + "step": 2454 + }, + { + "epoch": 0.52, + "learning_rate": 1.9911427952726644e-05, + "loss": 1.2758, + "step": 2455 + }, + { + "epoch": 0.52, + "learning_rate": 1.9897801594611204e-05, + "loss": 1.211, + "step": 2456 + }, + { + "epoch": 0.52, + "learning_rate": 1.988417528393673e-05, + "loss": 1.2334, + "step": 2457 + }, + { + "epoch": 0.52, + "learning_rate": 1.9870549027028635e-05, + "loss": 1.2707, + "step": 2458 + }, + { + "epoch": 0.52, + "learning_rate": 1.9856922830212286e-05, + "loss": 1.2852, + "step": 2459 + }, + { + "epoch": 0.52, + "learning_rate": 1.984329669981301e-05, + "loss": 1.2086, + "step": 2460 + }, + { + "epoch": 0.52, + "learning_rate": 1.9829670642156147e-05, + "loss": 1.2645, + "step": 2461 + }, + { + "epoch": 0.52, + "learning_rate": 1.981604466356695e-05, + "loss": 1.2752, + "step": 2462 + }, + { + "epoch": 0.52, + "learning_rate": 1.980241877037069e-05, + "loss": 1.1866, + "step": 2463 + }, + { + "epoch": 0.52, + "learning_rate": 1.9788792968892553e-05, + "loss": 1.262, + "step": 2464 + }, + { + "epoch": 0.52, + "learning_rate": 1.977516726545769e-05, + "loss": 1.3017, + "step": 2465 + }, + { + "epoch": 0.52, + "learning_rate": 1.976154166639124e-05, + "loss": 1.2054, + "step": 2466 + }, + { + "epoch": 0.52, + "learning_rate": 1.9747916178018246e-05, + "loss": 1.2443, + "step": 2467 + }, + { + "epoch": 0.52, + "learning_rate": 1.9734290806663738e-05, + "loss": 1.2655, + "step": 2468 + }, + { + "epoch": 0.52, + "learning_rate": 1.9720665558652676e-05, + "loss": 1.2588, + "step": 2469 + }, + { + "epoch": 0.52, + "learning_rate": 1.970704044030995e-05, + "loss": 1.281, + "step": 2470 + }, + { + "epoch": 0.52, + "learning_rate": 1.9693415457960426e-05, + "loss": 1.2657, + "step": 2471 + }, + { + "epoch": 0.52, + "learning_rate": 1.9679790617928872e-05, + "loss": 1.2382, + "step": 2472 + }, + { + "epoch": 0.52, + "learning_rate": 1.966616592653999e-05, + "loss": 1.2571, + "step": 2473 + }, + { + "epoch": 0.52, + "learning_rate": 1.9652541390118443e-05, + "loss": 1.2527, + "step": 2474 + }, + { + "epoch": 0.52, + "learning_rate": 1.963891701498879e-05, + "loss": 1.2489, + "step": 2475 + }, + { + "epoch": 0.52, + "learning_rate": 1.9625292807475548e-05, + "loss": 1.2523, + "step": 2476 + }, + { + "epoch": 0.52, + "learning_rate": 1.9611668773903124e-05, + "loss": 1.2716, + "step": 2477 + }, + { + "epoch": 0.52, + "learning_rate": 1.9598044920595853e-05, + "loss": 1.203, + "step": 2478 + }, + { + "epoch": 0.52, + "learning_rate": 1.9584421253878e-05, + "loss": 1.3159, + "step": 2479 + }, + { + "epoch": 0.52, + "learning_rate": 1.9570797780073737e-05, + "loss": 1.2792, + "step": 2480 + }, + { + "epoch": 0.52, + "learning_rate": 1.955717450550713e-05, + "loss": 1.2965, + "step": 2481 + }, + { + "epoch": 0.52, + "learning_rate": 1.9543551436502186e-05, + "loss": 1.2812, + "step": 2482 + }, + { + "epoch": 0.52, + "learning_rate": 1.9529928579382778e-05, + "loss": 1.2525, + "step": 2483 + }, + { + "epoch": 0.52, + "learning_rate": 1.9516305940472714e-05, + "loss": 1.2945, + "step": 2484 + }, + { + "epoch": 0.52, + "learning_rate": 1.9502683526095683e-05, + "loss": 1.3148, + "step": 2485 + }, + { + "epoch": 0.52, + "learning_rate": 1.948906134257526e-05, + "loss": 1.2594, + "step": 2486 + }, + { + "epoch": 0.52, + "learning_rate": 1.947543939623495e-05, + "loss": 1.2805, + "step": 2487 + }, + { + "epoch": 0.52, + "learning_rate": 1.9461817693398105e-05, + "loss": 1.3181, + "step": 2488 + }, + { + "epoch": 0.52, + "learning_rate": 1.944819624038798e-05, + "loss": 1.2239, + "step": 2489 + }, + { + "epoch": 0.52, + "learning_rate": 1.943457504352773e-05, + "loss": 1.2322, + "step": 2490 + }, + { + "epoch": 0.52, + "learning_rate": 1.942095410914037e-05, + "loss": 1.2296, + "step": 2491 + }, + { + "epoch": 0.52, + "learning_rate": 1.9407333443548806e-05, + "loss": 1.326, + "step": 2492 + }, + { + "epoch": 0.52, + "learning_rate": 1.9393713053075816e-05, + "loss": 1.2577, + "step": 2493 + }, + { + "epoch": 0.52, + "learning_rate": 1.9380092944044036e-05, + "loss": 1.2903, + "step": 2494 + }, + { + "epoch": 0.52, + "learning_rate": 1.9366473122776e-05, + "loss": 1.2448, + "step": 2495 + }, + { + "epoch": 0.53, + "learning_rate": 1.9352853595594077e-05, + "loss": 1.2491, + "step": 2496 + }, + { + "epoch": 0.53, + "learning_rate": 1.9339234368820533e-05, + "loss": 1.279, + "step": 2497 + }, + { + "epoch": 0.53, + "learning_rate": 1.932561544877746e-05, + "loss": 1.2312, + "step": 2498 + }, + { + "epoch": 0.53, + "learning_rate": 1.9311996841786825e-05, + "loss": 1.265, + "step": 2499 + }, + { + "epoch": 0.53, + "learning_rate": 1.9298378554170463e-05, + "loss": 1.217, + "step": 2500 + }, + { + "epoch": 0.53, + "learning_rate": 1.9284760592250037e-05, + "loss": 1.2786, + "step": 2501 + }, + { + "epoch": 0.53, + "learning_rate": 1.9271142962347058e-05, + "loss": 1.273, + "step": 2502 + }, + { + "epoch": 0.53, + "learning_rate": 1.925752567078291e-05, + "loss": 1.2468, + "step": 2503 + }, + { + "epoch": 0.53, + "learning_rate": 1.924390872387879e-05, + "loss": 1.3012, + "step": 2504 + }, + { + "epoch": 0.53, + "learning_rate": 1.923029212795576e-05, + "loss": 1.2393, + "step": 2505 + }, + { + "epoch": 0.53, + "learning_rate": 1.9216675889334704e-05, + "loss": 1.3048, + "step": 2506 + }, + { + "epoch": 0.53, + "learning_rate": 1.9203060014336334e-05, + "loss": 1.2441, + "step": 2507 + }, + { + "epoch": 0.53, + "learning_rate": 1.9189444509281216e-05, + "loss": 1.2865, + "step": 2508 + }, + { + "epoch": 0.53, + "learning_rate": 1.9175829380489727e-05, + "loss": 1.2377, + "step": 2509 + }, + { + "epoch": 0.53, + "learning_rate": 1.9162214634282055e-05, + "loss": 1.2666, + "step": 2510 + }, + { + "epoch": 0.53, + "learning_rate": 1.9148600276978254e-05, + "loss": 1.2533, + "step": 2511 + }, + { + "epoch": 0.53, + "learning_rate": 1.9134986314898156e-05, + "loss": 1.2998, + "step": 2512 + }, + { + "epoch": 0.53, + "learning_rate": 1.9121372754361437e-05, + "loss": 1.2494, + "step": 2513 + }, + { + "epoch": 0.53, + "learning_rate": 1.9107759601687562e-05, + "loss": 1.2354, + "step": 2514 + }, + { + "epoch": 0.53, + "learning_rate": 1.909414686319583e-05, + "loss": 1.2535, + "step": 2515 + }, + { + "epoch": 0.53, + "learning_rate": 1.9080534545205334e-05, + "loss": 1.2787, + "step": 2516 + }, + { + "epoch": 0.53, + "learning_rate": 1.9066922654034975e-05, + "loss": 1.2648, + "step": 2517 + }, + { + "epoch": 0.53, + "learning_rate": 1.9053311196003457e-05, + "loss": 1.3116, + "step": 2518 + }, + { + "epoch": 0.53, + "learning_rate": 1.9039700177429282e-05, + "loss": 1.2336, + "step": 2519 + }, + { + "epoch": 0.53, + "learning_rate": 1.9026089604630743e-05, + "loss": 1.307, + "step": 2520 + }, + { + "epoch": 0.53, + "learning_rate": 1.9012479483925942e-05, + "loss": 1.2066, + "step": 2521 + }, + { + "epoch": 0.53, + "learning_rate": 1.8998869821632757e-05, + "loss": 1.2977, + "step": 2522 + }, + { + "epoch": 0.53, + "learning_rate": 1.898526062406884e-05, + "loss": 1.2949, + "step": 2523 + }, + { + "epoch": 0.53, + "learning_rate": 1.8971651897551672e-05, + "loss": 1.2905, + "step": 2524 + }, + { + "epoch": 0.53, + "learning_rate": 1.8958043648398457e-05, + "loss": 1.2528, + "step": 2525 + }, + { + "epoch": 0.53, + "learning_rate": 1.8944435882926236e-05, + "loss": 1.2868, + "step": 2526 + }, + { + "epoch": 0.53, + "learning_rate": 1.893082860745178e-05, + "loss": 1.1937, + "step": 2527 + }, + { + "epoch": 0.53, + "learning_rate": 1.8917221828291652e-05, + "loss": 1.2792, + "step": 2528 + }, + { + "epoch": 0.53, + "learning_rate": 1.8903615551762182e-05, + "loss": 1.2896, + "step": 2529 + }, + { + "epoch": 0.53, + "learning_rate": 1.8890009784179476e-05, + "loss": 1.2784, + "step": 2530 + }, + { + "epoch": 0.53, + "learning_rate": 1.8876404531859376e-05, + "loss": 1.3013, + "step": 2531 + }, + { + "epoch": 0.53, + "learning_rate": 1.8862799801117523e-05, + "loss": 1.2834, + "step": 2532 + }, + { + "epoch": 0.53, + "learning_rate": 1.884919559826928e-05, + "loss": 1.2875, + "step": 2533 + }, + { + "epoch": 0.53, + "learning_rate": 1.8835591929629795e-05, + "loss": 1.2709, + "step": 2534 + }, + { + "epoch": 0.53, + "learning_rate": 1.882198880151395e-05, + "loss": 1.227, + "step": 2535 + }, + { + "epoch": 0.53, + "learning_rate": 1.8808386220236365e-05, + "loss": 1.2695, + "step": 2536 + }, + { + "epoch": 0.53, + "learning_rate": 1.8794784192111448e-05, + "loss": 1.2871, + "step": 2537 + }, + { + "epoch": 0.53, + "learning_rate": 1.8781182723453303e-05, + "loss": 1.2799, + "step": 2538 + }, + { + "epoch": 0.53, + "learning_rate": 1.8767581820575803e-05, + "loss": 1.2406, + "step": 2539 + }, + { + "epoch": 0.53, + "learning_rate": 1.875398148979255e-05, + "loss": 1.2502, + "step": 2540 + }, + { + "epoch": 0.53, + "learning_rate": 1.874038173741688e-05, + "loss": 1.2613, + "step": 2541 + }, + { + "epoch": 0.53, + "learning_rate": 1.8726782569761864e-05, + "loss": 1.2224, + "step": 2542 + }, + { + "epoch": 0.53, + "learning_rate": 1.87131839931403e-05, + "loss": 1.2702, + "step": 2543 + }, + { + "epoch": 0.54, + "learning_rate": 1.8699586013864694e-05, + "loss": 1.2646, + "step": 2544 + }, + { + "epoch": 0.54, + "learning_rate": 1.8685988638247316e-05, + "loss": 1.2868, + "step": 2545 + }, + { + "epoch": 0.54, + "learning_rate": 1.8672391872600108e-05, + "loss": 1.1826, + "step": 2546 + }, + { + "epoch": 0.54, + "learning_rate": 1.8658795723234774e-05, + "loss": 1.2546, + "step": 2547 + }, + { + "epoch": 0.54, + "learning_rate": 1.8645200196462698e-05, + "loss": 1.2052, + "step": 2548 + }, + { + "epoch": 0.54, + "learning_rate": 1.8631605298594977e-05, + "loss": 1.2719, + "step": 2549 + }, + { + "epoch": 0.54, + "learning_rate": 1.8618011035942444e-05, + "loss": 1.2677, + "step": 2550 + }, + { + "epoch": 0.54, + "learning_rate": 1.860441741481561e-05, + "loss": 1.2691, + "step": 2551 + }, + { + "epoch": 0.54, + "learning_rate": 1.8590824441524696e-05, + "loss": 1.2376, + "step": 2552 + }, + { + "epoch": 0.54, + "learning_rate": 1.8577232122379625e-05, + "loss": 1.2478, + "step": 2553 + }, + { + "epoch": 0.54, + "learning_rate": 1.8563640463690015e-05, + "loss": 1.2427, + "step": 2554 + }, + { + "epoch": 0.54, + "learning_rate": 1.8550049471765176e-05, + "loss": 1.2358, + "step": 2555 + }, + { + "epoch": 0.54, + "learning_rate": 1.853645915291412e-05, + "loss": 1.3074, + "step": 2556 + }, + { + "epoch": 0.54, + "learning_rate": 1.8522869513445515e-05, + "loss": 1.3063, + "step": 2557 + }, + { + "epoch": 0.54, + "learning_rate": 1.850928055966775e-05, + "loss": 1.2549, + "step": 2558 + }, + { + "epoch": 0.54, + "learning_rate": 1.8495692297888885e-05, + "loss": 1.267, + "step": 2559 + }, + { + "epoch": 0.54, + "learning_rate": 1.848210473441664e-05, + "loss": 1.2571, + "step": 2560 + }, + { + "epoch": 0.54, + "learning_rate": 1.8468517875558433e-05, + "loss": 1.2976, + "step": 2561 + }, + { + "epoch": 0.54, + "learning_rate": 1.845493172762134e-05, + "loss": 1.3059, + "step": 2562 + }, + { + "epoch": 0.54, + "learning_rate": 1.8441346296912128e-05, + "loss": 1.2603, + "step": 2563 + }, + { + "epoch": 0.54, + "learning_rate": 1.8427761589737203e-05, + "loss": 1.2504, + "step": 2564 + }, + { + "epoch": 0.54, + "learning_rate": 1.8414177612402657e-05, + "loss": 1.3052, + "step": 2565 + }, + { + "epoch": 0.54, + "learning_rate": 1.8400594371214234e-05, + "loss": 1.2742, + "step": 2566 + }, + { + "epoch": 0.54, + "learning_rate": 1.8387011872477338e-05, + "loss": 1.2572, + "step": 2567 + }, + { + "epoch": 0.54, + "learning_rate": 1.8373430122497022e-05, + "loss": 1.2841, + "step": 2568 + }, + { + "epoch": 0.54, + "learning_rate": 1.835984912757801e-05, + "loss": 1.2668, + "step": 2569 + }, + { + "epoch": 0.54, + "learning_rate": 1.8346268894024644e-05, + "loss": 1.2975, + "step": 2570 + }, + { + "epoch": 0.54, + "learning_rate": 1.8332689428140956e-05, + "loss": 1.2527, + "step": 2571 + }, + { + "epoch": 0.54, + "learning_rate": 1.831911073623058e-05, + "loss": 1.2708, + "step": 2572 + }, + { + "epoch": 0.54, + "learning_rate": 1.830553282459681e-05, + "loss": 1.2842, + "step": 2573 + }, + { + "epoch": 0.54, + "learning_rate": 1.8291955699542584e-05, + "loss": 1.2696, + "step": 2574 + }, + { + "epoch": 0.54, + "learning_rate": 1.8278379367370448e-05, + "loss": 1.2579, + "step": 2575 + }, + { + "epoch": 0.54, + "learning_rate": 1.8264803834382622e-05, + "loss": 1.284, + "step": 2576 + }, + { + "epoch": 0.54, + "learning_rate": 1.8251229106880916e-05, + "loss": 1.2991, + "step": 2577 + }, + { + "epoch": 0.54, + "learning_rate": 1.8237655191166785e-05, + "loss": 1.289, + "step": 2578 + }, + { + "epoch": 0.54, + "learning_rate": 1.8224082093541306e-05, + "loss": 1.2414, + "step": 2579 + }, + { + "epoch": 0.54, + "learning_rate": 1.8210509820305174e-05, + "loss": 1.2379, + "step": 2580 + }, + { + "epoch": 0.54, + "learning_rate": 1.8196938377758696e-05, + "loss": 1.2553, + "step": 2581 + }, + { + "epoch": 0.54, + "learning_rate": 1.818336777220181e-05, + "loss": 1.2305, + "step": 2582 + }, + { + "epoch": 0.54, + "learning_rate": 1.8169798009934038e-05, + "loss": 1.2905, + "step": 2583 + }, + { + "epoch": 0.54, + "learning_rate": 1.815622909725454e-05, + "loss": 1.3296, + "step": 2584 + }, + { + "epoch": 0.54, + "learning_rate": 1.8142661040462068e-05, + "loss": 1.265, + "step": 2585 + }, + { + "epoch": 0.54, + "learning_rate": 1.8129093845854965e-05, + "loss": 1.2608, + "step": 2586 + }, + { + "epoch": 0.54, + "learning_rate": 1.81155275197312e-05, + "loss": 1.2616, + "step": 2587 + }, + { + "epoch": 0.54, + "learning_rate": 1.8101962068388315e-05, + "loss": 1.2136, + "step": 2588 + }, + { + "epoch": 0.54, + "learning_rate": 1.808839749812346e-05, + "loss": 1.2556, + "step": 2589 + }, + { + "epoch": 0.54, + "learning_rate": 1.807483381523337e-05, + "loss": 1.27, + "step": 2590 + }, + { + "epoch": 0.54, + "learning_rate": 1.8061271026014364e-05, + "loss": 1.2541, + "step": 2591 + }, + { + "epoch": 0.55, + "learning_rate": 1.8047709136762368e-05, + "loss": 1.2369, + "step": 2592 + }, + { + "epoch": 0.55, + "learning_rate": 1.8034148153772864e-05, + "loss": 1.2211, + "step": 2593 + }, + { + "epoch": 0.55, + "learning_rate": 1.8020588083340912e-05, + "loss": 1.3083, + "step": 2594 + }, + { + "epoch": 0.55, + "learning_rate": 1.8007028931761184e-05, + "loss": 1.2896, + "step": 2595 + }, + { + "epoch": 0.55, + "learning_rate": 1.7993470705327877e-05, + "loss": 1.2885, + "step": 2596 + }, + { + "epoch": 0.55, + "learning_rate": 1.79799134103348e-05, + "loss": 1.2292, + "step": 2597 + }, + { + "epoch": 0.55, + "learning_rate": 1.7966357053075312e-05, + "loss": 1.299, + "step": 2598 + }, + { + "epoch": 0.55, + "learning_rate": 1.795280163984232e-05, + "loss": 1.2675, + "step": 2599 + }, + { + "epoch": 0.55, + "learning_rate": 1.7939247176928328e-05, + "loss": 1.2745, + "step": 2600 + }, + { + "epoch": 0.55, + "learning_rate": 1.792569367062537e-05, + "loss": 1.2909, + "step": 2601 + }, + { + "epoch": 0.55, + "learning_rate": 1.791214112722505e-05, + "loss": 1.2501, + "step": 2602 + }, + { + "epoch": 0.55, + "learning_rate": 1.7898589553018523e-05, + "loss": 1.2755, + "step": 2603 + }, + { + "epoch": 0.55, + "learning_rate": 1.788503895429649e-05, + "loss": 1.2249, + "step": 2604 + }, + { + "epoch": 0.55, + "learning_rate": 1.7871489337349208e-05, + "loss": 1.2698, + "step": 2605 + }, + { + "epoch": 0.55, + "learning_rate": 1.785794070846647e-05, + "loss": 1.238, + "step": 2606 + }, + { + "epoch": 0.55, + "learning_rate": 1.78443930739376e-05, + "loss": 1.287, + "step": 2607 + }, + { + "epoch": 0.55, + "learning_rate": 1.7830846440051493e-05, + "loss": 1.2503, + "step": 2608 + }, + { + "epoch": 0.55, + "learning_rate": 1.7817300813096548e-05, + "loss": 1.2666, + "step": 2609 + }, + { + "epoch": 0.55, + "learning_rate": 1.7803756199360704e-05, + "loss": 1.2553, + "step": 2610 + }, + { + "epoch": 0.55, + "learning_rate": 1.7790212605131448e-05, + "loss": 1.2677, + "step": 2611 + }, + { + "epoch": 0.55, + "learning_rate": 1.7776670036695758e-05, + "loss": 1.3103, + "step": 2612 + }, + { + "epoch": 0.55, + "learning_rate": 1.776312850034018e-05, + "loss": 1.2804, + "step": 2613 + }, + { + "epoch": 0.55, + "learning_rate": 1.7749588002350748e-05, + "loss": 1.2936, + "step": 2614 + }, + { + "epoch": 0.55, + "learning_rate": 1.7736048549013013e-05, + "loss": 1.2828, + "step": 2615 + }, + { + "epoch": 0.55, + "learning_rate": 1.7722510146612075e-05, + "loss": 1.1981, + "step": 2616 + }, + { + "epoch": 0.55, + "learning_rate": 1.770897280143251e-05, + "loss": 1.218, + "step": 2617 + }, + { + "epoch": 0.55, + "learning_rate": 1.7695436519758412e-05, + "loss": 1.2663, + "step": 2618 + }, + { + "epoch": 0.55, + "learning_rate": 1.76819013078734e-05, + "loss": 1.2229, + "step": 2619 + }, + { + "epoch": 0.55, + "learning_rate": 1.7668367172060562e-05, + "loss": 1.284, + "step": 2620 + }, + { + "epoch": 0.55, + "learning_rate": 1.765483411860253e-05, + "loss": 1.308, + "step": 2621 + }, + { + "epoch": 0.55, + "learning_rate": 1.7641302153781402e-05, + "loss": 1.2825, + "step": 2622 + }, + { + "epoch": 0.55, + "learning_rate": 1.7627771283878764e-05, + "loss": 1.2576, + "step": 2623 + }, + { + "epoch": 0.55, + "learning_rate": 1.761424151517573e-05, + "loss": 1.3091, + "step": 2624 + }, + { + "epoch": 0.55, + "learning_rate": 1.7600712853952863e-05, + "loss": 1.2467, + "step": 2625 + }, + { + "epoch": 0.55, + "learning_rate": 1.7587185306490245e-05, + "loss": 1.2428, + "step": 2626 + }, + { + "epoch": 0.55, + "learning_rate": 1.7573658879067424e-05, + "loss": 1.2758, + "step": 2627 + }, + { + "epoch": 0.55, + "learning_rate": 1.7560133577963423e-05, + "loss": 1.2585, + "step": 2628 + }, + { + "epoch": 0.55, + "learning_rate": 1.754660940945676e-05, + "loss": 1.2334, + "step": 2629 + }, + { + "epoch": 0.55, + "learning_rate": 1.753308637982541e-05, + "loss": 1.222, + "step": 2630 + }, + { + "epoch": 0.55, + "learning_rate": 1.751956449534682e-05, + "loss": 1.2623, + "step": 2631 + }, + { + "epoch": 0.55, + "learning_rate": 1.7506043762297932e-05, + "loss": 1.32, + "step": 2632 + }, + { + "epoch": 0.55, + "learning_rate": 1.7492524186955108e-05, + "loss": 1.2514, + "step": 2633 + }, + { + "epoch": 0.55, + "learning_rate": 1.7479005775594216e-05, + "loss": 1.3031, + "step": 2634 + }, + { + "epoch": 0.55, + "learning_rate": 1.746548853449056e-05, + "loss": 1.2556, + "step": 2635 + }, + { + "epoch": 0.55, + "learning_rate": 1.74519724699189e-05, + "loss": 1.2561, + "step": 2636 + }, + { + "epoch": 0.55, + "learning_rate": 1.7438457588153466e-05, + "loss": 1.1857, + "step": 2637 + }, + { + "epoch": 0.55, + "learning_rate": 1.742494389546792e-05, + "loss": 1.2086, + "step": 2638 + }, + { + "epoch": 0.56, + "learning_rate": 1.7411431398135384e-05, + "loss": 1.2687, + "step": 2639 + }, + { + "epoch": 0.56, + "learning_rate": 1.739792010242843e-05, + "loss": 1.2376, + "step": 2640 + }, + { + "epoch": 0.56, + "learning_rate": 1.738441001461905e-05, + "loss": 1.3122, + "step": 2641 + }, + { + "epoch": 0.56, + "learning_rate": 1.7370901140978706e-05, + "loss": 1.2921, + "step": 2642 + }, + { + "epoch": 0.56, + "learning_rate": 1.735739348777827e-05, + "loss": 1.2603, + "step": 2643 + }, + { + "epoch": 0.56, + "learning_rate": 1.734388706128805e-05, + "loss": 1.319, + "step": 2644 + }, + { + "epoch": 0.56, + "learning_rate": 1.7330381867777808e-05, + "loss": 1.2287, + "step": 2645 + }, + { + "epoch": 0.56, + "learning_rate": 1.731687791351671e-05, + "loss": 1.2493, + "step": 2646 + }, + { + "epoch": 0.56, + "learning_rate": 1.730337520477335e-05, + "loss": 1.2435, + "step": 2647 + }, + { + "epoch": 0.56, + "learning_rate": 1.7289873747815755e-05, + "loss": 1.2438, + "step": 2648 + }, + { + "epoch": 0.56, + "learning_rate": 1.7276373548911355e-05, + "loss": 1.2156, + "step": 2649 + }, + { + "epoch": 0.56, + "learning_rate": 1.7262874614327016e-05, + "loss": 1.2229, + "step": 2650 + }, + { + "epoch": 0.56, + "learning_rate": 1.7249376950329004e-05, + "loss": 1.22, + "step": 2651 + }, + { + "epoch": 0.56, + "learning_rate": 1.7235880563182988e-05, + "loss": 1.1856, + "step": 2652 + }, + { + "epoch": 0.56, + "learning_rate": 1.7222385459154072e-05, + "loss": 1.3036, + "step": 2653 + }, + { + "epoch": 0.56, + "learning_rate": 1.720889164450672e-05, + "loss": 1.2084, + "step": 2654 + }, + { + "epoch": 0.56, + "learning_rate": 1.7195399125504853e-05, + "loss": 1.2874, + "step": 2655 + }, + { + "epoch": 0.56, + "learning_rate": 1.7181907908411744e-05, + "loss": 1.2356, + "step": 2656 + }, + { + "epoch": 0.56, + "learning_rate": 1.716841799949007e-05, + "loss": 1.2575, + "step": 2657 + }, + { + "epoch": 0.56, + "learning_rate": 1.7154929405001936e-05, + "loss": 1.2283, + "step": 2658 + }, + { + "epoch": 0.56, + "learning_rate": 1.7141442131208788e-05, + "loss": 1.2843, + "step": 2659 + }, + { + "epoch": 0.56, + "learning_rate": 1.712795618437148e-05, + "loss": 1.2767, + "step": 2660 + }, + { + "epoch": 0.56, + "learning_rate": 1.7114471570750266e-05, + "loss": 1.2471, + "step": 2661 + }, + { + "epoch": 0.56, + "learning_rate": 1.7100988296604756e-05, + "loss": 1.2411, + "step": 2662 + }, + { + "epoch": 0.56, + "learning_rate": 1.708750636819395e-05, + "loss": 1.2241, + "step": 2663 + }, + { + "epoch": 0.56, + "learning_rate": 1.7074025791776232e-05, + "loss": 1.2435, + "step": 2664 + }, + { + "epoch": 0.56, + "learning_rate": 1.706054657360933e-05, + "loss": 1.2949, + "step": 2665 + }, + { + "epoch": 0.56, + "learning_rate": 1.704706871995038e-05, + "loss": 1.2506, + "step": 2666 + }, + { + "epoch": 0.56, + "learning_rate": 1.703359223705585e-05, + "loss": 1.2397, + "step": 2667 + }, + { + "epoch": 0.56, + "learning_rate": 1.7020117131181585e-05, + "loss": 1.3231, + "step": 2668 + }, + { + "epoch": 0.56, + "learning_rate": 1.70066434085828e-05, + "loss": 1.2777, + "step": 2669 + }, + { + "epoch": 0.56, + "learning_rate": 1.6993171075514054e-05, + "loss": 1.242, + "step": 2670 + }, + { + "epoch": 0.56, + "learning_rate": 1.697970013822927e-05, + "loss": 1.2717, + "step": 2671 + }, + { + "epoch": 0.56, + "learning_rate": 1.6966230602981727e-05, + "loss": 1.2544, + "step": 2672 + }, + { + "epoch": 0.56, + "learning_rate": 1.6952762476024023e-05, + "loss": 1.2066, + "step": 2673 + }, + { + "epoch": 0.56, + "learning_rate": 1.6939295763608146e-05, + "loss": 1.1992, + "step": 2674 + }, + { + "epoch": 0.56, + "learning_rate": 1.6925830471985398e-05, + "loss": 1.2805, + "step": 2675 + }, + { + "epoch": 0.56, + "learning_rate": 1.6912366607406433e-05, + "loss": 1.2913, + "step": 2676 + }, + { + "epoch": 0.56, + "learning_rate": 1.6898904176121246e-05, + "loss": 1.234, + "step": 2677 + }, + { + "epoch": 0.56, + "learning_rate": 1.688544318437914e-05, + "loss": 1.309, + "step": 2678 + }, + { + "epoch": 0.56, + "learning_rate": 1.6871983638428794e-05, + "loss": 1.3139, + "step": 2679 + }, + { + "epoch": 0.56, + "learning_rate": 1.685852554451818e-05, + "loss": 1.2674, + "step": 2680 + }, + { + "epoch": 0.56, + "learning_rate": 1.6845068908894597e-05, + "loss": 1.2524, + "step": 2681 + }, + { + "epoch": 0.56, + "learning_rate": 1.68316137378047e-05, + "loss": 1.2966, + "step": 2682 + }, + { + "epoch": 0.56, + "learning_rate": 1.681816003749442e-05, + "loss": 1.2549, + "step": 2683 + }, + { + "epoch": 0.56, + "learning_rate": 1.6804707814209046e-05, + "loss": 1.2338, + "step": 2684 + }, + { + "epoch": 0.56, + "learning_rate": 1.6791257074193156e-05, + "loss": 1.304, + "step": 2685 + }, + { + "epoch": 0.56, + "learning_rate": 1.677780782369064e-05, + "loss": 1.274, + "step": 2686 + }, + { + "epoch": 0.57, + "learning_rate": 1.6764360068944706e-05, + "loss": 1.2277, + "step": 2687 + }, + { + "epoch": 0.57, + "learning_rate": 1.6750913816197873e-05, + "loss": 1.3203, + "step": 2688 + }, + { + "epoch": 0.57, + "learning_rate": 1.6737469071691936e-05, + "loss": 1.2951, + "step": 2689 + }, + { + "epoch": 0.57, + "learning_rate": 1.6724025841668026e-05, + "loss": 1.2688, + "step": 2690 + }, + { + "epoch": 0.57, + "learning_rate": 1.6710584132366542e-05, + "loss": 1.288, + "step": 2691 + }, + { + "epoch": 0.57, + "learning_rate": 1.6697143950027194e-05, + "loss": 1.2346, + "step": 2692 + }, + { + "epoch": 0.57, + "learning_rate": 1.6683705300888977e-05, + "loss": 1.2722, + "step": 2693 + }, + { + "epoch": 0.57, + "learning_rate": 1.667026819119016e-05, + "loss": 1.3016, + "step": 2694 + }, + { + "epoch": 0.57, + "learning_rate": 1.6656832627168338e-05, + "loss": 1.3038, + "step": 2695 + }, + { + "epoch": 0.57, + "learning_rate": 1.6643398615060346e-05, + "loss": 1.268, + "step": 2696 + }, + { + "epoch": 0.57, + "learning_rate": 1.6629966161102304e-05, + "loss": 1.2274, + "step": 2697 + }, + { + "epoch": 0.57, + "learning_rate": 1.661653527152964e-05, + "loss": 1.2902, + "step": 2698 + }, + { + "epoch": 0.57, + "learning_rate": 1.6603105952577024e-05, + "loss": 1.2847, + "step": 2699 + }, + { + "epoch": 0.57, + "learning_rate": 1.6589678210478415e-05, + "loss": 1.3047, + "step": 2700 + }, + { + "epoch": 0.57, + "learning_rate": 1.657625205146703e-05, + "loss": 1.2448, + "step": 2701 + }, + { + "epoch": 0.57, + "learning_rate": 1.6562827481775353e-05, + "loss": 1.285, + "step": 2702 + }, + { + "epoch": 0.57, + "learning_rate": 1.6549404507635135e-05, + "loss": 1.2517, + "step": 2703 + }, + { + "epoch": 0.57, + "learning_rate": 1.6535983135277378e-05, + "loss": 1.316, + "step": 2704 + }, + { + "epoch": 0.57, + "learning_rate": 1.6522563370932355e-05, + "loss": 1.2458, + "step": 2705 + }, + { + "epoch": 0.57, + "learning_rate": 1.6509145220829574e-05, + "loss": 1.2566, + "step": 2706 + }, + { + "epoch": 0.57, + "learning_rate": 1.64957286911978e-05, + "loss": 1.2483, + "step": 2707 + }, + { + "epoch": 0.57, + "learning_rate": 1.6482313788265058e-05, + "loss": 1.2282, + "step": 2708 + }, + { + "epoch": 0.57, + "learning_rate": 1.64689005182586e-05, + "loss": 1.2725, + "step": 2709 + }, + { + "epoch": 0.57, + "learning_rate": 1.6455488887404935e-05, + "loss": 1.2895, + "step": 2710 + }, + { + "epoch": 0.57, + "learning_rate": 1.6442078901929803e-05, + "loss": 1.2979, + "step": 2711 + }, + { + "epoch": 0.57, + "learning_rate": 1.6428670568058176e-05, + "loss": 1.2606, + "step": 2712 + }, + { + "epoch": 0.57, + "learning_rate": 1.641526389201427e-05, + "loss": 1.2355, + "step": 2713 + }, + { + "epoch": 0.57, + "learning_rate": 1.640185888002153e-05, + "loss": 1.285, + "step": 2714 + }, + { + "epoch": 0.57, + "learning_rate": 1.6388455538302612e-05, + "loss": 1.2582, + "step": 2715 + }, + { + "epoch": 0.57, + "learning_rate": 1.6375053873079424e-05, + "loss": 1.2936, + "step": 2716 + }, + { + "epoch": 0.57, + "learning_rate": 1.6361653890573078e-05, + "loss": 1.2875, + "step": 2717 + }, + { + "epoch": 0.57, + "learning_rate": 1.6348255597003896e-05, + "loss": 1.2882, + "step": 2718 + }, + { + "epoch": 0.57, + "learning_rate": 1.633485899859144e-05, + "loss": 1.2174, + "step": 2719 + }, + { + "epoch": 0.57, + "learning_rate": 1.632146410155447e-05, + "loss": 1.1721, + "step": 2720 + }, + { + "epoch": 0.57, + "learning_rate": 1.6308070912110965e-05, + "loss": 1.2582, + "step": 2721 + }, + { + "epoch": 0.57, + "learning_rate": 1.6294679436478095e-05, + "loss": 1.2698, + "step": 2722 + }, + { + "epoch": 0.57, + "learning_rate": 1.6281289680872252e-05, + "loss": 1.2157, + "step": 2723 + }, + { + "epoch": 0.57, + "learning_rate": 1.6267901651509022e-05, + "loss": 1.2678, + "step": 2724 + }, + { + "epoch": 0.57, + "learning_rate": 1.6254515354603194e-05, + "loss": 1.2412, + "step": 2725 + }, + { + "epoch": 0.57, + "learning_rate": 1.6241130796368737e-05, + "loss": 1.2696, + "step": 2726 + }, + { + "epoch": 0.57, + "learning_rate": 1.6227747983018845e-05, + "loss": 1.2756, + "step": 2727 + }, + { + "epoch": 0.57, + "learning_rate": 1.6214366920765856e-05, + "loss": 1.2865, + "step": 2728 + }, + { + "epoch": 0.57, + "learning_rate": 1.620098761582135e-05, + "loss": 1.2554, + "step": 2729 + }, + { + "epoch": 0.57, + "learning_rate": 1.6187610074396044e-05, + "loss": 1.2372, + "step": 2730 + }, + { + "epoch": 0.57, + "learning_rate": 1.6174234302699856e-05, + "loss": 1.291, + "step": 2731 + }, + { + "epoch": 0.57, + "learning_rate": 1.616086030694189e-05, + "loss": 1.2583, + "step": 2732 + }, + { + "epoch": 0.57, + "learning_rate": 1.6147488093330405e-05, + "loss": 1.2837, + "step": 2733 + }, + { + "epoch": 0.58, + "learning_rate": 1.6134117668072858e-05, + "loss": 1.2748, + "step": 2734 + }, + { + "epoch": 0.58, + "learning_rate": 1.612074903737585e-05, + "loss": 1.2685, + "step": 2735 + }, + { + "epoch": 0.58, + "learning_rate": 1.610738220744517e-05, + "loss": 1.2561, + "step": 2736 + }, + { + "epoch": 0.58, + "learning_rate": 1.6094017184485763e-05, + "loss": 1.2569, + "step": 2737 + }, + { + "epoch": 0.58, + "learning_rate": 1.6080653974701732e-05, + "loss": 1.2776, + "step": 2738 + }, + { + "epoch": 0.58, + "learning_rate": 1.6067292584296333e-05, + "loss": 1.2768, + "step": 2739 + }, + { + "epoch": 0.58, + "learning_rate": 1.6053933019472003e-05, + "loss": 1.2387, + "step": 2740 + }, + { + "epoch": 0.58, + "learning_rate": 1.6040575286430295e-05, + "loss": 1.2502, + "step": 2741 + }, + { + "epoch": 0.58, + "learning_rate": 1.602721939137195e-05, + "loss": 1.2123, + "step": 2742 + }, + { + "epoch": 0.58, + "learning_rate": 1.6013865340496826e-05, + "loss": 1.2785, + "step": 2743 + }, + { + "epoch": 0.58, + "learning_rate": 1.6000513140003927e-05, + "loss": 1.2757, + "step": 2744 + }, + { + "epoch": 0.58, + "learning_rate": 1.5987162796091428e-05, + "loss": 1.2975, + "step": 2745 + }, + { + "epoch": 0.58, + "learning_rate": 1.5973814314956602e-05, + "loss": 1.2783, + "step": 2746 + }, + { + "epoch": 0.58, + "learning_rate": 1.596046770279588e-05, + "loss": 1.2931, + "step": 2747 + }, + { + "epoch": 0.58, + "learning_rate": 1.5947122965804827e-05, + "loss": 1.3106, + "step": 2748 + }, + { + "epoch": 0.58, + "learning_rate": 1.5933780110178128e-05, + "loss": 1.2796, + "step": 2749 + }, + { + "epoch": 0.58, + "learning_rate": 1.59204391421096e-05, + "loss": 1.2381, + "step": 2750 + }, + { + "epoch": 0.58, + "learning_rate": 1.5907100067792186e-05, + "loss": 1.2646, + "step": 2751 + }, + { + "epoch": 0.58, + "learning_rate": 1.589376289341793e-05, + "loss": 1.266, + "step": 2752 + }, + { + "epoch": 0.58, + "learning_rate": 1.5880427625178035e-05, + "loss": 1.2513, + "step": 2753 + }, + { + "epoch": 0.58, + "learning_rate": 1.586709426926277e-05, + "loss": 1.2819, + "step": 2754 + }, + { + "epoch": 0.58, + "learning_rate": 1.5853762831861567e-05, + "loss": 1.2558, + "step": 2755 + }, + { + "epoch": 0.58, + "learning_rate": 1.5840433319162925e-05, + "loss": 1.2937, + "step": 2756 + }, + { + "epoch": 0.58, + "learning_rate": 1.5827105737354456e-05, + "loss": 1.2824, + "step": 2757 + }, + { + "epoch": 0.58, + "learning_rate": 1.5813780092622907e-05, + "loss": 1.261, + "step": 2758 + }, + { + "epoch": 0.58, + "learning_rate": 1.580045639115409e-05, + "loss": 1.2662, + "step": 2759 + }, + { + "epoch": 0.58, + "learning_rate": 1.5787134639132935e-05, + "loss": 1.3024, + "step": 2760 + }, + { + "epoch": 0.58, + "learning_rate": 1.577381484274346e-05, + "loss": 1.2585, + "step": 2761 + }, + { + "epoch": 0.58, + "learning_rate": 1.576049700816877e-05, + "loss": 1.2769, + "step": 2762 + }, + { + "epoch": 0.58, + "learning_rate": 1.574718114159108e-05, + "loss": 1.2846, + "step": 2763 + }, + { + "epoch": 0.58, + "learning_rate": 1.5733867249191667e-05, + "loss": 1.2085, + "step": 2764 + }, + { + "epoch": 0.58, + "learning_rate": 1.57205553371509e-05, + "loss": 1.2509, + "step": 2765 + }, + { + "epoch": 0.58, + "learning_rate": 1.570724541164824e-05, + "loss": 1.2795, + "step": 2766 + }, + { + "epoch": 0.58, + "learning_rate": 1.569393747886221e-05, + "loss": 1.2655, + "step": 2767 + }, + { + "epoch": 0.58, + "learning_rate": 1.5680631544970405e-05, + "loss": 1.2706, + "step": 2768 + }, + { + "epoch": 0.58, + "learning_rate": 1.5667327616149522e-05, + "loss": 1.2338, + "step": 2769 + }, + { + "epoch": 0.58, + "learning_rate": 1.5654025698575286e-05, + "loss": 1.3053, + "step": 2770 + }, + { + "epoch": 0.58, + "learning_rate": 1.5640725798422525e-05, + "loss": 1.2229, + "step": 2771 + }, + { + "epoch": 0.58, + "learning_rate": 1.5627427921865106e-05, + "loss": 1.2047, + "step": 2772 + }, + { + "epoch": 0.58, + "learning_rate": 1.5614132075075967e-05, + "loss": 1.2788, + "step": 2773 + }, + { + "epoch": 0.58, + "learning_rate": 1.5600838264227102e-05, + "loss": 1.2995, + "step": 2774 + }, + { + "epoch": 0.58, + "learning_rate": 1.5587546495489563e-05, + "loss": 1.2613, + "step": 2775 + }, + { + "epoch": 0.58, + "learning_rate": 1.557425677503344e-05, + "loss": 1.2044, + "step": 2776 + }, + { + "epoch": 0.58, + "learning_rate": 1.5560969109027896e-05, + "loss": 1.2386, + "step": 2777 + }, + { + "epoch": 0.58, + "learning_rate": 1.5547683503641115e-05, + "loss": 1.2889, + "step": 2778 + }, + { + "epoch": 0.58, + "learning_rate": 1.5534399965040353e-05, + "loss": 1.2513, + "step": 2779 + }, + { + "epoch": 0.58, + "learning_rate": 1.552111849939188e-05, + "loss": 1.2381, + "step": 2780 + }, + { + "epoch": 0.58, + "learning_rate": 1.550783911286101e-05, + "loss": 1.2871, + "step": 2781 + }, + { + "epoch": 0.59, + "learning_rate": 1.5494561811612102e-05, + "loss": 1.3133, + "step": 2782 + }, + { + "epoch": 0.59, + "learning_rate": 1.548128660180854e-05, + "loss": 1.2307, + "step": 2783 + }, + { + "epoch": 0.59, + "learning_rate": 1.5468013489612742e-05, + "loss": 1.2312, + "step": 2784 + }, + { + "epoch": 0.59, + "learning_rate": 1.5454742481186137e-05, + "loss": 1.3291, + "step": 2785 + }, + { + "epoch": 0.59, + "learning_rate": 1.5441473582689198e-05, + "loss": 1.2897, + "step": 2786 + }, + { + "epoch": 0.59, + "learning_rate": 1.5428206800281413e-05, + "loss": 1.2645, + "step": 2787 + }, + { + "epoch": 0.59, + "learning_rate": 1.5414942140121278e-05, + "loss": 1.2757, + "step": 2788 + }, + { + "epoch": 0.59, + "learning_rate": 1.54016796083663e-05, + "loss": 1.2955, + "step": 2789 + }, + { + "epoch": 0.59, + "learning_rate": 1.538841921117303e-05, + "loss": 1.2767, + "step": 2790 + }, + { + "epoch": 0.59, + "learning_rate": 1.5375160954696986e-05, + "loss": 1.219, + "step": 2791 + }, + { + "epoch": 0.59, + "learning_rate": 1.536190484509273e-05, + "loss": 1.2828, + "step": 2792 + }, + { + "epoch": 0.59, + "learning_rate": 1.5348650888513798e-05, + "loss": 1.1934, + "step": 2793 + }, + { + "epoch": 0.59, + "learning_rate": 1.533539909111273e-05, + "loss": 1.2265, + "step": 2794 + }, + { + "epoch": 0.59, + "learning_rate": 1.5322149459041097e-05, + "loss": 1.2764, + "step": 2795 + }, + { + "epoch": 0.59, + "learning_rate": 1.5308901998449415e-05, + "loss": 1.232, + "step": 2796 + }, + { + "epoch": 0.59, + "learning_rate": 1.5295656715487226e-05, + "loss": 1.2899, + "step": 2797 + }, + { + "epoch": 0.59, + "learning_rate": 1.5282413616303063e-05, + "loss": 1.3017, + "step": 2798 + }, + { + "epoch": 0.59, + "learning_rate": 1.526917270704441e-05, + "loss": 1.2194, + "step": 2799 + }, + { + "epoch": 0.59, + "learning_rate": 1.5255933993857785e-05, + "loss": 1.2998, + "step": 2800 + }, + { + "epoch": 0.59, + "learning_rate": 1.5242697482888649e-05, + "loss": 1.2646, + "step": 2801 + }, + { + "epoch": 0.59, + "learning_rate": 1.5229463180281441e-05, + "loss": 1.2445, + "step": 2802 + }, + { + "epoch": 0.59, + "learning_rate": 1.5216231092179604e-05, + "loss": 1.2482, + "step": 2803 + }, + { + "epoch": 0.59, + "learning_rate": 1.5203001224725525e-05, + "loss": 1.2311, + "step": 2804 + }, + { + "epoch": 0.59, + "learning_rate": 1.5189773584060563e-05, + "loss": 1.2904, + "step": 2805 + }, + { + "epoch": 0.59, + "learning_rate": 1.517654817632507e-05, + "loss": 1.2851, + "step": 2806 + }, + { + "epoch": 0.59, + "learning_rate": 1.5163325007658319e-05, + "loss": 1.2917, + "step": 2807 + }, + { + "epoch": 0.59, + "learning_rate": 1.5150104084198587e-05, + "loss": 1.2033, + "step": 2808 + }, + { + "epoch": 0.59, + "learning_rate": 1.5136885412083073e-05, + "loss": 1.255, + "step": 2809 + }, + { + "epoch": 0.59, + "learning_rate": 1.5123668997447948e-05, + "loss": 1.2871, + "step": 2810 + }, + { + "epoch": 0.59, + "learning_rate": 1.5110454846428348e-05, + "loss": 1.2719, + "step": 2811 + }, + { + "epoch": 0.59, + "learning_rate": 1.5097242965158322e-05, + "loss": 1.246, + "step": 2812 + }, + { + "epoch": 0.59, + "learning_rate": 1.5084033359770907e-05, + "loss": 1.2591, + "step": 2813 + }, + { + "epoch": 0.59, + "learning_rate": 1.5070826036398052e-05, + "loss": 1.2777, + "step": 2814 + }, + { + "epoch": 0.59, + "learning_rate": 1.505762100117065e-05, + "loss": 1.1695, + "step": 2815 + }, + { + "epoch": 0.59, + "learning_rate": 1.5044418260218559e-05, + "loss": 1.3016, + "step": 2816 + }, + { + "epoch": 0.59, + "learning_rate": 1.503121781967054e-05, + "loss": 1.3046, + "step": 2817 + }, + { + "epoch": 0.59, + "learning_rate": 1.5018019685654295e-05, + "loss": 1.2725, + "step": 2818 + }, + { + "epoch": 0.59, + "learning_rate": 1.5004823864296472e-05, + "loss": 1.2431, + "step": 2819 + }, + { + "epoch": 0.59, + "learning_rate": 1.4991630361722619e-05, + "loss": 1.2256, + "step": 2820 + }, + { + "epoch": 0.59, + "learning_rate": 1.4978439184057233e-05, + "loss": 1.202, + "step": 2821 + }, + { + "epoch": 0.59, + "learning_rate": 1.4965250337423718e-05, + "loss": 1.2295, + "step": 2822 + }, + { + "epoch": 0.59, + "learning_rate": 1.4952063827944385e-05, + "loss": 1.283, + "step": 2823 + }, + { + "epoch": 0.59, + "learning_rate": 1.4938879661740495e-05, + "loss": 1.2384, + "step": 2824 + }, + { + "epoch": 0.59, + "learning_rate": 1.4925697844932185e-05, + "loss": 1.2484, + "step": 2825 + }, + { + "epoch": 0.59, + "learning_rate": 1.4912518383638512e-05, + "loss": 1.2424, + "step": 2826 + }, + { + "epoch": 0.59, + "learning_rate": 1.4899341283977457e-05, + "loss": 1.2382, + "step": 2827 + }, + { + "epoch": 0.59, + "learning_rate": 1.4886166552065873e-05, + "loss": 1.2815, + "step": 2828 + }, + { + "epoch": 0.6, + "learning_rate": 1.4872994194019553e-05, + "loss": 1.2739, + "step": 2829 + }, + { + "epoch": 0.6, + "learning_rate": 1.4859824215953154e-05, + "loss": 1.2352, + "step": 2830 + }, + { + "epoch": 0.6, + "learning_rate": 1.4846656623980234e-05, + "loss": 1.211, + "step": 2831 + }, + { + "epoch": 0.6, + "learning_rate": 1.4833491424213268e-05, + "loss": 1.1948, + "step": 2832 + }, + { + "epoch": 0.6, + "learning_rate": 1.4820328622763584e-05, + "loss": 1.2689, + "step": 2833 + }, + { + "epoch": 0.6, + "learning_rate": 1.4807168225741433e-05, + "loss": 1.2698, + "step": 2834 + }, + { + "epoch": 0.6, + "learning_rate": 1.4794010239255925e-05, + "loss": 1.2735, + "step": 2835 + }, + { + "epoch": 0.6, + "learning_rate": 1.4780854669415053e-05, + "loss": 1.2473, + "step": 2836 + }, + { + "epoch": 0.6, + "learning_rate": 1.4767701522325708e-05, + "loss": 1.3154, + "step": 2837 + }, + { + "epoch": 0.6, + "learning_rate": 1.4754550804093633e-05, + "loss": 1.228, + "step": 2838 + }, + { + "epoch": 0.6, + "learning_rate": 1.4741402520823442e-05, + "loss": 1.2206, + "step": 2839 + }, + { + "epoch": 0.6, + "learning_rate": 1.4728256678618652e-05, + "loss": 1.2807, + "step": 2840 + }, + { + "epoch": 0.6, + "learning_rate": 1.47151132835816e-05, + "loss": 1.2791, + "step": 2841 + }, + { + "epoch": 0.6, + "learning_rate": 1.4701972341813533e-05, + "loss": 1.23, + "step": 2842 + }, + { + "epoch": 0.6, + "learning_rate": 1.4688833859414529e-05, + "loss": 1.2019, + "step": 2843 + }, + { + "epoch": 0.6, + "learning_rate": 1.467569784248352e-05, + "loss": 1.263, + "step": 2844 + }, + { + "epoch": 0.6, + "learning_rate": 1.4662564297118325e-05, + "loss": 1.2614, + "step": 2845 + }, + { + "epoch": 0.6, + "learning_rate": 1.4649433229415588e-05, + "loss": 1.2007, + "step": 2846 + }, + { + "epoch": 0.6, + "learning_rate": 1.4636304645470807e-05, + "loss": 1.2403, + "step": 2847 + }, + { + "epoch": 0.6, + "learning_rate": 1.4623178551378346e-05, + "loss": 1.2279, + "step": 2848 + }, + { + "epoch": 0.6, + "learning_rate": 1.4610054953231379e-05, + "loss": 1.2979, + "step": 2849 + }, + { + "epoch": 0.6, + "learning_rate": 1.4596933857121963e-05, + "loss": 1.2706, + "step": 2850 + }, + { + "epoch": 0.6, + "learning_rate": 1.4583815269140957e-05, + "loss": 1.2874, + "step": 2851 + }, + { + "epoch": 0.6, + "learning_rate": 1.4570699195378071e-05, + "loss": 1.2471, + "step": 2852 + }, + { + "epoch": 0.6, + "learning_rate": 1.4557585641921859e-05, + "loss": 1.2704, + "step": 2853 + }, + { + "epoch": 0.6, + "learning_rate": 1.4544474614859683e-05, + "loss": 1.2611, + "step": 2854 + }, + { + "epoch": 0.6, + "learning_rate": 1.4531366120277736e-05, + "loss": 1.2439, + "step": 2855 + }, + { + "epoch": 0.6, + "learning_rate": 1.4518260164261058e-05, + "loss": 1.273, + "step": 2856 + }, + { + "epoch": 0.6, + "learning_rate": 1.4505156752893488e-05, + "loss": 1.2756, + "step": 2857 + }, + { + "epoch": 0.6, + "learning_rate": 1.4492055892257688e-05, + "loss": 1.2911, + "step": 2858 + }, + { + "epoch": 0.6, + "learning_rate": 1.4478957588435148e-05, + "loss": 1.2161, + "step": 2859 + }, + { + "epoch": 0.6, + "learning_rate": 1.4465861847506142e-05, + "loss": 1.2703, + "step": 2860 + }, + { + "epoch": 0.6, + "learning_rate": 1.4452768675549798e-05, + "loss": 1.2377, + "step": 2861 + }, + { + "epoch": 0.6, + "learning_rate": 1.4439678078644004e-05, + "loss": 1.2483, + "step": 2862 + }, + { + "epoch": 0.6, + "learning_rate": 1.4426590062865497e-05, + "loss": 1.242, + "step": 2863 + }, + { + "epoch": 0.6, + "learning_rate": 1.4413504634289785e-05, + "loss": 1.2969, + "step": 2864 + }, + { + "epoch": 0.6, + "learning_rate": 1.4400421798991178e-05, + "loss": 1.2291, + "step": 2865 + }, + { + "epoch": 0.6, + "learning_rate": 1.4387341563042801e-05, + "loss": 1.2642, + "step": 2866 + }, + { + "epoch": 0.6, + "learning_rate": 1.4374263932516557e-05, + "loss": 1.3157, + "step": 2867 + }, + { + "epoch": 0.6, + "learning_rate": 1.4361188913483132e-05, + "loss": 1.2582, + "step": 2868 + }, + { + "epoch": 0.6, + "learning_rate": 1.4348116512012024e-05, + "loss": 1.2395, + "step": 2869 + }, + { + "epoch": 0.6, + "learning_rate": 1.4335046734171499e-05, + "loss": 1.2779, + "step": 2870 + }, + { + "epoch": 0.6, + "learning_rate": 1.4321979586028607e-05, + "loss": 1.2751, + "step": 2871 + }, + { + "epoch": 0.6, + "learning_rate": 1.4308915073649182e-05, + "loss": 1.2613, + "step": 2872 + }, + { + "epoch": 0.6, + "learning_rate": 1.4295853203097823e-05, + "loss": 1.2756, + "step": 2873 + }, + { + "epoch": 0.6, + "learning_rate": 1.4282793980437923e-05, + "loss": 1.2659, + "step": 2874 + }, + { + "epoch": 0.6, + "learning_rate": 1.4269737411731627e-05, + "loss": 1.3003, + "step": 2875 + }, + { + "epoch": 0.6, + "learning_rate": 1.425668350303985e-05, + "loss": 1.237, + "step": 2876 + }, + { + "epoch": 0.61, + "learning_rate": 1.4243632260422292e-05, + "loss": 1.2457, + "step": 2877 + }, + { + "epoch": 0.61, + "learning_rate": 1.4230583689937381e-05, + "loss": 1.2737, + "step": 2878 + }, + { + "epoch": 0.61, + "learning_rate": 1.4217537797642343e-05, + "loss": 1.2034, + "step": 2879 + }, + { + "epoch": 0.61, + "learning_rate": 1.4204494589593127e-05, + "loss": 1.2646, + "step": 2880 + }, + { + "epoch": 0.61, + "learning_rate": 1.4191454071844457e-05, + "loss": 1.2501, + "step": 2881 + }, + { + "epoch": 0.61, + "learning_rate": 1.4178416250449804e-05, + "loss": 1.2616, + "step": 2882 + }, + { + "epoch": 0.61, + "learning_rate": 1.4165381131461388e-05, + "loss": 1.2314, + "step": 2883 + }, + { + "epoch": 0.61, + "learning_rate": 1.4152348720930156e-05, + "loss": 1.2869, + "step": 2884 + }, + { + "epoch": 0.61, + "learning_rate": 1.4139319024905836e-05, + "loss": 1.2607, + "step": 2885 + }, + { + "epoch": 0.61, + "learning_rate": 1.412629204943685e-05, + "loss": 1.2817, + "step": 2886 + }, + { + "epoch": 0.61, + "learning_rate": 1.4113267800570402e-05, + "loss": 1.2456, + "step": 2887 + }, + { + "epoch": 0.61, + "learning_rate": 1.41002462843524e-05, + "loss": 1.2291, + "step": 2888 + }, + { + "epoch": 0.61, + "learning_rate": 1.4087227506827482e-05, + "loss": 1.2301, + "step": 2889 + }, + { + "epoch": 0.61, + "learning_rate": 1.4074211474039046e-05, + "loss": 1.2349, + "step": 2890 + }, + { + "epoch": 0.61, + "learning_rate": 1.406119819202917e-05, + "loss": 1.2626, + "step": 2891 + }, + { + "epoch": 0.61, + "learning_rate": 1.4048187666838707e-05, + "loss": 1.2817, + "step": 2892 + }, + { + "epoch": 0.61, + "learning_rate": 1.4035179904507184e-05, + "loss": 1.3066, + "step": 2893 + }, + { + "epoch": 0.61, + "learning_rate": 1.4022174911072868e-05, + "loss": 1.2531, + "step": 2894 + }, + { + "epoch": 0.61, + "learning_rate": 1.4009172692572743e-05, + "loss": 1.2722, + "step": 2895 + }, + { + "epoch": 0.61, + "learning_rate": 1.39961732550425e-05, + "loss": 1.2629, + "step": 2896 + }, + { + "epoch": 0.61, + "learning_rate": 1.3983176604516526e-05, + "loss": 1.2283, + "step": 2897 + }, + { + "epoch": 0.61, + "learning_rate": 1.3970182747027944e-05, + "loss": 1.3009, + "step": 2898 + }, + { + "epoch": 0.61, + "learning_rate": 1.3957191688608544e-05, + "loss": 1.2429, + "step": 2899 + }, + { + "epoch": 0.61, + "learning_rate": 1.3944203435288857e-05, + "loss": 1.2579, + "step": 2900 + }, + { + "epoch": 0.61, + "learning_rate": 1.3931217993098076e-05, + "loss": 1.2907, + "step": 2901 + }, + { + "epoch": 0.61, + "learning_rate": 1.3918235368064102e-05, + "loss": 1.2069, + "step": 2902 + }, + { + "epoch": 0.61, + "learning_rate": 1.3905255566213542e-05, + "loss": 1.2855, + "step": 2903 + }, + { + "epoch": 0.61, + "learning_rate": 1.3892278593571669e-05, + "loss": 1.2529, + "step": 2904 + }, + { + "epoch": 0.61, + "learning_rate": 1.3879304456162457e-05, + "loss": 1.215, + "step": 2905 + }, + { + "epoch": 0.61, + "learning_rate": 1.3866333160008562e-05, + "loss": 1.2576, + "step": 2906 + }, + { + "epoch": 0.61, + "learning_rate": 1.3853364711131324e-05, + "loss": 1.1791, + "step": 2907 + }, + { + "epoch": 0.61, + "learning_rate": 1.3840399115550748e-05, + "loss": 1.2156, + "step": 2908 + }, + { + "epoch": 0.61, + "learning_rate": 1.3827436379285537e-05, + "loss": 1.2298, + "step": 2909 + }, + { + "epoch": 0.61, + "learning_rate": 1.3814476508353036e-05, + "loss": 1.2701, + "step": 2910 + }, + { + "epoch": 0.61, + "learning_rate": 1.3801519508769295e-05, + "loss": 1.253, + "step": 2911 + }, + { + "epoch": 0.61, + "learning_rate": 1.3788565386548996e-05, + "loss": 1.2261, + "step": 2912 + }, + { + "epoch": 0.61, + "learning_rate": 1.3775614147705521e-05, + "loss": 1.2228, + "step": 2913 + }, + { + "epoch": 0.61, + "learning_rate": 1.3762665798250887e-05, + "loss": 1.263, + "step": 2914 + }, + { + "epoch": 0.61, + "learning_rate": 1.3749720344195768e-05, + "loss": 1.243, + "step": 2915 + }, + { + "epoch": 0.61, + "learning_rate": 1.373677779154952e-05, + "loss": 1.2675, + "step": 2916 + }, + { + "epoch": 0.61, + "learning_rate": 1.3723838146320128e-05, + "loss": 1.2731, + "step": 2917 + }, + { + "epoch": 0.61, + "learning_rate": 1.3710901414514235e-05, + "loss": 1.2577, + "step": 2918 + }, + { + "epoch": 0.61, + "learning_rate": 1.3697967602137135e-05, + "loss": 1.3023, + "step": 2919 + }, + { + "epoch": 0.61, + "learning_rate": 1.368503671519276e-05, + "loss": 1.2115, + "step": 2920 + }, + { + "epoch": 0.61, + "learning_rate": 1.3672108759683694e-05, + "loss": 1.2979, + "step": 2921 + }, + { + "epoch": 0.61, + "learning_rate": 1.3659183741611154e-05, + "loss": 1.2026, + "step": 2922 + }, + { + "epoch": 0.61, + "learning_rate": 1.3646261666974976e-05, + "loss": 1.2819, + "step": 2923 + }, + { + "epoch": 0.62, + "learning_rate": 1.3633342541773673e-05, + "loss": 1.2395, + "step": 2924 + }, + { + "epoch": 0.62, + "learning_rate": 1.3620426372004353e-05, + "loss": 1.2855, + "step": 2925 + }, + { + "epoch": 0.62, + "learning_rate": 1.360751316366275e-05, + "loss": 1.2285, + "step": 2926 + }, + { + "epoch": 0.62, + "learning_rate": 1.3594602922743252e-05, + "loss": 1.2752, + "step": 2927 + }, + { + "epoch": 0.62, + "learning_rate": 1.358169565523884e-05, + "loss": 1.2844, + "step": 2928 + }, + { + "epoch": 0.62, + "learning_rate": 1.356879136714114e-05, + "loss": 1.2648, + "step": 2929 + }, + { + "epoch": 0.62, + "learning_rate": 1.3555890064440374e-05, + "loss": 1.2449, + "step": 2930 + }, + { + "epoch": 0.62, + "learning_rate": 1.3542991753125387e-05, + "loss": 1.2562, + "step": 2931 + }, + { + "epoch": 0.62, + "learning_rate": 1.3530096439183637e-05, + "loss": 1.2648, + "step": 2932 + }, + { + "epoch": 0.62, + "learning_rate": 1.3517204128601193e-05, + "loss": 1.3028, + "step": 2933 + }, + { + "epoch": 0.62, + "learning_rate": 1.3504314827362715e-05, + "loss": 1.2734, + "step": 2934 + }, + { + "epoch": 0.62, + "learning_rate": 1.3491428541451487e-05, + "loss": 1.2486, + "step": 2935 + }, + { + "epoch": 0.62, + "learning_rate": 1.3478545276849373e-05, + "loss": 1.2236, + "step": 2936 + }, + { + "epoch": 0.62, + "learning_rate": 1.3465665039536857e-05, + "loss": 1.2125, + "step": 2937 + }, + { + "epoch": 0.62, + "learning_rate": 1.3452787835492998e-05, + "loss": 1.2841, + "step": 2938 + }, + { + "epoch": 0.62, + "learning_rate": 1.3439913670695445e-05, + "loss": 1.272, + "step": 2939 + }, + { + "epoch": 0.62, + "learning_rate": 1.3427042551120461e-05, + "loss": 1.2461, + "step": 2940 + }, + { + "epoch": 0.62, + "learning_rate": 1.3414174482742865e-05, + "loss": 1.2438, + "step": 2941 + }, + { + "epoch": 0.62, + "learning_rate": 1.3401309471536092e-05, + "loss": 1.3218, + "step": 2942 + }, + { + "epoch": 0.62, + "learning_rate": 1.3388447523472122e-05, + "loss": 1.2427, + "step": 2943 + }, + { + "epoch": 0.62, + "learning_rate": 1.337558864452154e-05, + "loss": 1.2186, + "step": 2944 + }, + { + "epoch": 0.62, + "learning_rate": 1.3362732840653494e-05, + "loss": 1.281, + "step": 2945 + }, + { + "epoch": 0.62, + "learning_rate": 1.3349880117835716e-05, + "loss": 1.2928, + "step": 2946 + }, + { + "epoch": 0.62, + "learning_rate": 1.3337030482034485e-05, + "loss": 1.238, + "step": 2947 + }, + { + "epoch": 0.62, + "learning_rate": 1.332418393921468e-05, + "loss": 1.2709, + "step": 2948 + }, + { + "epoch": 0.62, + "learning_rate": 1.3311340495339704e-05, + "loss": 1.2721, + "step": 2949 + }, + { + "epoch": 0.62, + "learning_rate": 1.3298500156371565e-05, + "loss": 1.2556, + "step": 2950 + }, + { + "epoch": 0.62, + "learning_rate": 1.32856629282708e-05, + "loss": 1.2359, + "step": 2951 + }, + { + "epoch": 0.62, + "learning_rate": 1.3272828816996498e-05, + "loss": 1.1736, + "step": 2952 + }, + { + "epoch": 0.62, + "learning_rate": 1.3259997828506333e-05, + "loss": 1.236, + "step": 2953 + }, + { + "epoch": 0.62, + "learning_rate": 1.3247169968756494e-05, + "loss": 1.2504, + "step": 2954 + }, + { + "epoch": 0.62, + "learning_rate": 1.3234345243701743e-05, + "loss": 1.2845, + "step": 2955 + }, + { + "epoch": 0.62, + "learning_rate": 1.322152365929537e-05, + "loss": 1.3128, + "step": 2956 + }, + { + "epoch": 0.62, + "learning_rate": 1.320870522148921e-05, + "loss": 1.2583, + "step": 2957 + }, + { + "epoch": 0.62, + "learning_rate": 1.3195889936233662e-05, + "loss": 1.2285, + "step": 2958 + }, + { + "epoch": 0.62, + "learning_rate": 1.3183077809477625e-05, + "loss": 1.244, + "step": 2959 + }, + { + "epoch": 0.62, + "learning_rate": 1.3170268847168541e-05, + "loss": 1.2554, + "step": 2960 + }, + { + "epoch": 0.62, + "learning_rate": 1.315746305525241e-05, + "loss": 1.2997, + "step": 2961 + }, + { + "epoch": 0.62, + "learning_rate": 1.3144660439673727e-05, + "loss": 1.2706, + "step": 2962 + }, + { + "epoch": 0.62, + "learning_rate": 1.3131861006375519e-05, + "loss": 1.2711, + "step": 2963 + }, + { + "epoch": 0.62, + "learning_rate": 1.311906476129936e-05, + "loss": 1.2799, + "step": 2964 + }, + { + "epoch": 0.62, + "learning_rate": 1.3106271710385312e-05, + "loss": 1.2707, + "step": 2965 + }, + { + "epoch": 0.62, + "learning_rate": 1.3093481859571981e-05, + "loss": 1.2544, + "step": 2966 + }, + { + "epoch": 0.62, + "learning_rate": 1.3080695214796464e-05, + "loss": 1.3039, + "step": 2967 + }, + { + "epoch": 0.62, + "learning_rate": 1.3067911781994384e-05, + "loss": 1.268, + "step": 2968 + }, + { + "epoch": 0.62, + "learning_rate": 1.3055131567099872e-05, + "loss": 1.2769, + "step": 2969 + }, + { + "epoch": 0.62, + "learning_rate": 1.3042354576045559e-05, + "loss": 1.2833, + "step": 2970 + }, + { + "epoch": 0.62, + "learning_rate": 1.3029580814762596e-05, + "loss": 1.3312, + "step": 2971 + }, + { + "epoch": 0.63, + "learning_rate": 1.3016810289180615e-05, + "loss": 1.2674, + "step": 2972 + }, + { + "epoch": 0.63, + "learning_rate": 1.3004043005227742e-05, + "loss": 1.2727, + "step": 2973 + }, + { + "epoch": 0.63, + "learning_rate": 1.2991278968830625e-05, + "loss": 1.2411, + "step": 2974 + }, + { + "epoch": 0.63, + "learning_rate": 1.2978518185914388e-05, + "loss": 1.2056, + "step": 2975 + }, + { + "epoch": 0.63, + "learning_rate": 1.296576066240263e-05, + "loss": 1.2812, + "step": 2976 + }, + { + "epoch": 0.63, + "learning_rate": 1.2953006404217474e-05, + "loss": 1.2758, + "step": 2977 + }, + { + "epoch": 0.63, + "learning_rate": 1.2940255417279486e-05, + "loss": 1.2314, + "step": 2978 + }, + { + "epoch": 0.63, + "learning_rate": 1.2927507707507751e-05, + "loss": 1.2708, + "step": 2979 + }, + { + "epoch": 0.63, + "learning_rate": 1.2914763280819804e-05, + "loss": 1.2673, + "step": 2980 + }, + { + "epoch": 0.63, + "learning_rate": 1.2902022143131668e-05, + "loss": 1.2948, + "step": 2981 + }, + { + "epoch": 0.63, + "learning_rate": 1.2889284300357847e-05, + "loss": 1.19, + "step": 2982 + }, + { + "epoch": 0.63, + "learning_rate": 1.28765497584113e-05, + "loss": 1.2836, + "step": 2983 + }, + { + "epoch": 0.63, + "learning_rate": 1.2863818523203452e-05, + "loss": 1.2801, + "step": 2984 + }, + { + "epoch": 0.63, + "learning_rate": 1.2851090600644223e-05, + "loss": 1.2242, + "step": 2985 + }, + { + "epoch": 0.63, + "learning_rate": 1.2838365996641949e-05, + "loss": 1.214, + "step": 2986 + }, + { + "epoch": 0.63, + "learning_rate": 1.2825644717103472e-05, + "loss": 1.2517, + "step": 2987 + }, + { + "epoch": 0.63, + "learning_rate": 1.281292676793406e-05, + "loss": 1.2531, + "step": 2988 + }, + { + "epoch": 0.63, + "learning_rate": 1.2800212155037437e-05, + "loss": 1.2424, + "step": 2989 + }, + { + "epoch": 0.63, + "learning_rate": 1.27875008843158e-05, + "loss": 1.2163, + "step": 2990 + }, + { + "epoch": 0.63, + "learning_rate": 1.2774792961669764e-05, + "loss": 1.2368, + "step": 2991 + }, + { + "epoch": 0.63, + "learning_rate": 1.2762088392998417e-05, + "loss": 1.2562, + "step": 2992 + }, + { + "epoch": 0.63, + "learning_rate": 1.2749387184199283e-05, + "loss": 1.2586, + "step": 2993 + }, + { + "epoch": 0.63, + "learning_rate": 1.2736689341168304e-05, + "loss": 1.2376, + "step": 2994 + }, + { + "epoch": 0.63, + "learning_rate": 1.2723994869799898e-05, + "loss": 1.276, + "step": 2995 + }, + { + "epoch": 0.63, + "learning_rate": 1.2711303775986888e-05, + "loss": 1.2508, + "step": 2996 + }, + { + "epoch": 0.63, + "learning_rate": 1.2698616065620528e-05, + "loss": 1.2692, + "step": 2997 + }, + { + "epoch": 0.63, + "learning_rate": 1.2685931744590536e-05, + "loss": 1.2778, + "step": 2998 + }, + { + "epoch": 0.63, + "learning_rate": 1.267325081878501e-05, + "loss": 1.2686, + "step": 2999 + }, + { + "epoch": 0.63, + "learning_rate": 1.2660573294090512e-05, + "loss": 1.1985, + "step": 3000 + }, + { + "epoch": 0.63, + "learning_rate": 1.2647899176392003e-05, + "loss": 1.2766, + "step": 3001 + }, + { + "epoch": 0.63, + "learning_rate": 1.2635228471572853e-05, + "loss": 1.2287, + "step": 3002 + }, + { + "epoch": 0.63, + "learning_rate": 1.2622561185514886e-05, + "loss": 1.2832, + "step": 3003 + }, + { + "epoch": 0.63, + "learning_rate": 1.2609897324098297e-05, + "loss": 1.2815, + "step": 3004 + }, + { + "epoch": 0.63, + "learning_rate": 1.2597236893201712e-05, + "loss": 1.2282, + "step": 3005 + }, + { + "epoch": 0.63, + "learning_rate": 1.2584579898702175e-05, + "loss": 1.2733, + "step": 3006 + }, + { + "epoch": 0.63, + "learning_rate": 1.25719263464751e-05, + "loss": 1.2478, + "step": 3007 + }, + { + "epoch": 0.63, + "learning_rate": 1.2559276242394347e-05, + "loss": 1.2505, + "step": 3008 + }, + { + "epoch": 0.63, + "learning_rate": 1.254662959233214e-05, + "loss": 1.2536, + "step": 3009 + }, + { + "epoch": 0.63, + "learning_rate": 1.2533986402159113e-05, + "loss": 1.226, + "step": 3010 + }, + { + "epoch": 0.63, + "learning_rate": 1.25213466777443e-05, + "loss": 1.2879, + "step": 3011 + }, + { + "epoch": 0.63, + "learning_rate": 1.2508710424955117e-05, + "loss": 1.2379, + "step": 3012 + }, + { + "epoch": 0.63, + "learning_rate": 1.249607764965736e-05, + "loss": 1.2265, + "step": 3013 + }, + { + "epoch": 0.63, + "learning_rate": 1.2483448357715242e-05, + "loss": 1.2394, + "step": 3014 + }, + { + "epoch": 0.63, + "learning_rate": 1.2470822554991321e-05, + "loss": 1.2516, + "step": 3015 + }, + { + "epoch": 0.63, + "learning_rate": 1.2458200247346569e-05, + "loss": 1.2821, + "step": 3016 + }, + { + "epoch": 0.63, + "learning_rate": 1.2445581440640312e-05, + "loss": 1.2852, + "step": 3017 + }, + { + "epoch": 0.63, + "learning_rate": 1.243296614073025e-05, + "loss": 1.1998, + "step": 3018 + }, + { + "epoch": 0.64, + "learning_rate": 1.2420354353472483e-05, + "loss": 1.25, + "step": 3019 + }, + { + "epoch": 0.64, + "learning_rate": 1.2407746084721444e-05, + "loss": 1.2859, + "step": 3020 + }, + { + "epoch": 0.64, + "learning_rate": 1.2395141340329966e-05, + "loss": 1.2565, + "step": 3021 + }, + { + "epoch": 0.64, + "learning_rate": 1.2382540126149218e-05, + "loss": 1.2491, + "step": 3022 + }, + { + "epoch": 0.64, + "learning_rate": 1.2369942448028738e-05, + "loss": 1.2762, + "step": 3023 + }, + { + "epoch": 0.64, + "learning_rate": 1.2357348311816444e-05, + "loss": 1.2898, + "step": 3024 + }, + { + "epoch": 0.64, + "learning_rate": 1.2344757723358583e-05, + "loss": 1.2155, + "step": 3025 + }, + { + "epoch": 0.64, + "learning_rate": 1.2332170688499753e-05, + "loss": 1.2094, + "step": 3026 + }, + { + "epoch": 0.64, + "learning_rate": 1.2319587213082931e-05, + "loss": 1.2514, + "step": 3027 + }, + { + "epoch": 0.64, + "learning_rate": 1.230700730294942e-05, + "loss": 1.2304, + "step": 3028 + }, + { + "epoch": 0.64, + "learning_rate": 1.229443096393887e-05, + "loss": 1.2681, + "step": 3029 + }, + { + "epoch": 0.64, + "learning_rate": 1.2281858201889283e-05, + "loss": 1.2783, + "step": 3030 + }, + { + "epoch": 0.64, + "learning_rate": 1.226928902263698e-05, + "loss": 1.2335, + "step": 3031 + }, + { + "epoch": 0.64, + "learning_rate": 1.2256723432016648e-05, + "loss": 1.2521, + "step": 3032 + }, + { + "epoch": 0.64, + "learning_rate": 1.2244161435861286e-05, + "loss": 1.2283, + "step": 3033 + }, + { + "epoch": 0.64, + "learning_rate": 1.2231603040002225e-05, + "loss": 1.2738, + "step": 3034 + }, + { + "epoch": 0.64, + "learning_rate": 1.2219048250269141e-05, + "loss": 1.2477, + "step": 3035 + }, + { + "epoch": 0.64, + "learning_rate": 1.2206497072490014e-05, + "loss": 1.2762, + "step": 3036 + }, + { + "epoch": 0.64, + "learning_rate": 1.2193949512491172e-05, + "loss": 1.1641, + "step": 3037 + }, + { + "epoch": 0.64, + "learning_rate": 1.2181405576097247e-05, + "loss": 1.2404, + "step": 3038 + }, + { + "epoch": 0.64, + "learning_rate": 1.2168865269131182e-05, + "loss": 1.2503, + "step": 3039 + }, + { + "epoch": 0.64, + "learning_rate": 1.2156328597414258e-05, + "loss": 1.2949, + "step": 3040 + }, + { + "epoch": 0.64, + "learning_rate": 1.2143795566766054e-05, + "loss": 1.2353, + "step": 3041 + }, + { + "epoch": 0.64, + "learning_rate": 1.2131266183004455e-05, + "loss": 1.2002, + "step": 3042 + }, + { + "epoch": 0.64, + "learning_rate": 1.2118740451945668e-05, + "loss": 1.3042, + "step": 3043 + }, + { + "epoch": 0.64, + "learning_rate": 1.2106218379404187e-05, + "loss": 1.2446, + "step": 3044 + }, + { + "epoch": 0.64, + "learning_rate": 1.2093699971192825e-05, + "loss": 1.2387, + "step": 3045 + }, + { + "epoch": 0.64, + "learning_rate": 1.208118523312268e-05, + "loss": 1.2728, + "step": 3046 + }, + { + "epoch": 0.64, + "learning_rate": 1.2068674171003146e-05, + "loss": 1.2408, + "step": 3047 + }, + { + "epoch": 0.64, + "learning_rate": 1.205616679064193e-05, + "loss": 1.2971, + "step": 3048 + }, + { + "epoch": 0.64, + "learning_rate": 1.2043663097844999e-05, + "loss": 1.225, + "step": 3049 + }, + { + "epoch": 0.64, + "learning_rate": 1.2031163098416644e-05, + "loss": 1.2419, + "step": 3050 + }, + { + "epoch": 0.64, + "learning_rate": 1.2018666798159408e-05, + "loss": 1.2404, + "step": 3051 + }, + { + "epoch": 0.64, + "learning_rate": 1.2006174202874141e-05, + "loss": 1.2231, + "step": 3052 + }, + { + "epoch": 0.64, + "learning_rate": 1.1993685318359956e-05, + "loss": 1.2834, + "step": 3053 + }, + { + "epoch": 0.64, + "learning_rate": 1.1981200150414262e-05, + "loss": 1.2667, + "step": 3054 + }, + { + "epoch": 0.64, + "learning_rate": 1.1968718704832716e-05, + "loss": 1.2504, + "step": 3055 + }, + { + "epoch": 0.64, + "learning_rate": 1.195624098740928e-05, + "loss": 1.3046, + "step": 3056 + }, + { + "epoch": 0.64, + "learning_rate": 1.1943767003936152e-05, + "loss": 1.3131, + "step": 3057 + }, + { + "epoch": 0.64, + "learning_rate": 1.1931296760203831e-05, + "loss": 1.1847, + "step": 3058 + }, + { + "epoch": 0.64, + "learning_rate": 1.1918830262001055e-05, + "loss": 1.2474, + "step": 3059 + }, + { + "epoch": 0.64, + "learning_rate": 1.1906367515114816e-05, + "loss": 1.2154, + "step": 3060 + }, + { + "epoch": 0.64, + "learning_rate": 1.1893908525330401e-05, + "loss": 1.2043, + "step": 3061 + }, + { + "epoch": 0.64, + "learning_rate": 1.1881453298431323e-05, + "loss": 1.2553, + "step": 3062 + }, + { + "epoch": 0.64, + "learning_rate": 1.1869001840199345e-05, + "loss": 1.2315, + "step": 3063 + }, + { + "epoch": 0.64, + "learning_rate": 1.1856554156414503e-05, + "loss": 1.2944, + "step": 3064 + }, + { + "epoch": 0.64, + "learning_rate": 1.1844110252855072e-05, + "loss": 1.2535, + "step": 3065 + }, + { + "epoch": 0.64, + "learning_rate": 1.1831670135297564e-05, + "loss": 1.2273, + "step": 3066 + }, + { + "epoch": 0.65, + "learning_rate": 1.1819233809516746e-05, + "loss": 1.2651, + "step": 3067 + }, + { + "epoch": 0.65, + "learning_rate": 1.1806801281285604e-05, + "loss": 1.2971, + "step": 3068 + }, + { + "epoch": 0.65, + "learning_rate": 1.1794372556375392e-05, + "loss": 1.1915, + "step": 3069 + }, + { + "epoch": 0.65, + "learning_rate": 1.178194764055557e-05, + "loss": 1.2387, + "step": 3070 + }, + { + "epoch": 0.65, + "learning_rate": 1.176952653959385e-05, + "loss": 1.2562, + "step": 3071 + }, + { + "epoch": 0.65, + "learning_rate": 1.1757109259256163e-05, + "loss": 1.2709, + "step": 3072 + }, + { + "epoch": 0.65, + "learning_rate": 1.174469580530666e-05, + "loss": 1.2709, + "step": 3073 + }, + { + "epoch": 0.65, + "learning_rate": 1.1732286183507738e-05, + "loss": 1.2748, + "step": 3074 + }, + { + "epoch": 0.65, + "learning_rate": 1.1719880399619987e-05, + "loss": 1.2853, + "step": 3075 + }, + { + "epoch": 0.65, + "learning_rate": 1.1707478459402236e-05, + "loss": 1.2439, + "step": 3076 + }, + { + "epoch": 0.65, + "learning_rate": 1.1695080368611526e-05, + "loss": 1.2483, + "step": 3077 + }, + { + "epoch": 0.65, + "learning_rate": 1.1682686133003105e-05, + "loss": 1.2746, + "step": 3078 + }, + { + "epoch": 0.65, + "learning_rate": 1.167029575833044e-05, + "loss": 1.2671, + "step": 3079 + }, + { + "epoch": 0.65, + "learning_rate": 1.1657909250345194e-05, + "loss": 1.262, + "step": 3080 + }, + { + "epoch": 0.65, + "learning_rate": 1.1645526614797235e-05, + "loss": 1.233, + "step": 3081 + }, + { + "epoch": 0.65, + "learning_rate": 1.1633147857434658e-05, + "loss": 1.2542, + "step": 3082 + }, + { + "epoch": 0.65, + "learning_rate": 1.1620772984003724e-05, + "loss": 1.1922, + "step": 3083 + }, + { + "epoch": 0.65, + "learning_rate": 1.1608402000248908e-05, + "loss": 1.2216, + "step": 3084 + }, + { + "epoch": 0.65, + "learning_rate": 1.1596034911912896e-05, + "loss": 1.2636, + "step": 3085 + }, + { + "epoch": 0.65, + "learning_rate": 1.1583671724736526e-05, + "loss": 1.2652, + "step": 3086 + }, + { + "epoch": 0.65, + "learning_rate": 1.157131244445886e-05, + "loss": 1.2796, + "step": 3087 + }, + { + "epoch": 0.65, + "learning_rate": 1.1558957076817135e-05, + "loss": 1.2416, + "step": 3088 + }, + { + "epoch": 0.65, + "learning_rate": 1.1546605627546752e-05, + "loss": 1.2384, + "step": 3089 + }, + { + "epoch": 0.65, + "learning_rate": 1.1534258102381332e-05, + "loss": 1.2593, + "step": 3090 + }, + { + "epoch": 0.65, + "learning_rate": 1.1521914507052646e-05, + "loss": 1.2582, + "step": 3091 + }, + { + "epoch": 0.65, + "learning_rate": 1.1509574847290641e-05, + "loss": 1.2765, + "step": 3092 + }, + { + "epoch": 0.65, + "learning_rate": 1.1497239128823456e-05, + "loss": 1.2534, + "step": 3093 + }, + { + "epoch": 0.65, + "learning_rate": 1.1484907357377378e-05, + "loss": 1.2489, + "step": 3094 + }, + { + "epoch": 0.65, + "learning_rate": 1.1472579538676883e-05, + "loss": 1.2354, + "step": 3095 + }, + { + "epoch": 0.65, + "learning_rate": 1.1460255678444598e-05, + "loss": 1.2463, + "step": 3096 + }, + { + "epoch": 0.65, + "learning_rate": 1.144793578240131e-05, + "loss": 1.2898, + "step": 3097 + }, + { + "epoch": 0.65, + "learning_rate": 1.1435619856265982e-05, + "loss": 1.2528, + "step": 3098 + }, + { + "epoch": 0.65, + "learning_rate": 1.1423307905755714e-05, + "loss": 1.2369, + "step": 3099 + }, + { + "epoch": 0.65, + "learning_rate": 1.1410999936585782e-05, + "loss": 1.244, + "step": 3100 + }, + { + "epoch": 0.65, + "learning_rate": 1.1398695954469598e-05, + "loss": 1.2379, + "step": 3101 + }, + { + "epoch": 0.65, + "learning_rate": 1.1386395965118715e-05, + "loss": 1.2566, + "step": 3102 + }, + { + "epoch": 0.65, + "learning_rate": 1.1374099974242867e-05, + "loss": 1.208, + "step": 3103 + }, + { + "epoch": 0.65, + "learning_rate": 1.13618079875499e-05, + "loss": 1.2772, + "step": 3104 + }, + { + "epoch": 0.65, + "learning_rate": 1.1349520010745802e-05, + "loss": 1.2572, + "step": 3105 + }, + { + "epoch": 0.65, + "learning_rate": 1.1337236049534726e-05, + "loss": 1.2293, + "step": 3106 + }, + { + "epoch": 0.65, + "learning_rate": 1.1324956109618927e-05, + "loss": 1.2289, + "step": 3107 + }, + { + "epoch": 0.65, + "learning_rate": 1.1312680196698817e-05, + "loss": 1.2304, + "step": 3108 + }, + { + "epoch": 0.65, + "learning_rate": 1.1300408316472944e-05, + "loss": 1.2079, + "step": 3109 + }, + { + "epoch": 0.65, + "learning_rate": 1.1288140474637953e-05, + "loss": 1.2966, + "step": 3110 + }, + { + "epoch": 0.65, + "learning_rate": 1.127587667688865e-05, + "loss": 1.2413, + "step": 3111 + }, + { + "epoch": 0.65, + "learning_rate": 1.126361692891794e-05, + "loss": 1.2613, + "step": 3112 + }, + { + "epoch": 0.65, + "learning_rate": 1.1251361236416845e-05, + "loss": 1.2146, + "step": 3113 + }, + { + "epoch": 0.65, + "learning_rate": 1.1239109605074527e-05, + "loss": 1.2915, + "step": 3114 + }, + { + "epoch": 0.66, + "learning_rate": 1.1226862040578244e-05, + "loss": 1.2645, + "step": 3115 + }, + { + "epoch": 0.66, + "learning_rate": 1.1214618548613379e-05, + "loss": 1.2736, + "step": 3116 + }, + { + "epoch": 0.66, + "learning_rate": 1.1202379134863412e-05, + "loss": 1.2165, + "step": 3117 + }, + { + "epoch": 0.66, + "learning_rate": 1.1190143805009934e-05, + "loss": 1.2914, + "step": 3118 + }, + { + "epoch": 0.66, + "learning_rate": 1.117791256473265e-05, + "loss": 1.2711, + "step": 3119 + }, + { + "epoch": 0.66, + "learning_rate": 1.1165685419709353e-05, + "loss": 1.2913, + "step": 3120 + }, + { + "epoch": 0.66, + "learning_rate": 1.1153462375615934e-05, + "loss": 1.2376, + "step": 3121 + }, + { + "epoch": 0.66, + "learning_rate": 1.1141243438126403e-05, + "loss": 1.2159, + "step": 3122 + }, + { + "epoch": 0.66, + "learning_rate": 1.1129028612912832e-05, + "loss": 1.2791, + "step": 3123 + }, + { + "epoch": 0.66, + "learning_rate": 1.1116817905645411e-05, + "loss": 1.2404, + "step": 3124 + }, + { + "epoch": 0.66, + "learning_rate": 1.1104611321992404e-05, + "loss": 1.1767, + "step": 3125 + }, + { + "epoch": 0.66, + "learning_rate": 1.1092408867620155e-05, + "loss": 1.2579, + "step": 3126 + }, + { + "epoch": 0.66, + "learning_rate": 1.1080210548193113e-05, + "loss": 1.2654, + "step": 3127 + }, + { + "epoch": 0.66, + "learning_rate": 1.1068016369373784e-05, + "loss": 1.2605, + "step": 3128 + }, + { + "epoch": 0.66, + "learning_rate": 1.1055826336822775e-05, + "loss": 1.2506, + "step": 3129 + }, + { + "epoch": 0.66, + "learning_rate": 1.1043640456198745e-05, + "loss": 1.2704, + "step": 3130 + }, + { + "epoch": 0.66, + "learning_rate": 1.1031458733158434e-05, + "loss": 1.2567, + "step": 3131 + }, + { + "epoch": 0.66, + "learning_rate": 1.101928117335666e-05, + "loss": 1.2427, + "step": 3132 + }, + { + "epoch": 0.66, + "learning_rate": 1.100710778244631e-05, + "loss": 1.3009, + "step": 3133 + }, + { + "epoch": 0.66, + "learning_rate": 1.0994938566078315e-05, + "loss": 1.333, + "step": 3134 + }, + { + "epoch": 0.66, + "learning_rate": 1.0982773529901696e-05, + "loss": 1.2514, + "step": 3135 + }, + { + "epoch": 0.66, + "learning_rate": 1.0970612679563501e-05, + "loss": 1.2851, + "step": 3136 + }, + { + "epoch": 0.66, + "learning_rate": 1.0958456020708875e-05, + "loss": 1.2252, + "step": 3137 + }, + { + "epoch": 0.66, + "learning_rate": 1.0946303558980981e-05, + "loss": 1.2286, + "step": 3138 + }, + { + "epoch": 0.66, + "learning_rate": 1.0934155300021048e-05, + "loss": 1.2222, + "step": 3139 + }, + { + "epoch": 0.66, + "learning_rate": 1.0922011249468362e-05, + "loss": 1.276, + "step": 3140 + }, + { + "epoch": 0.66, + "learning_rate": 1.0909871412960245e-05, + "loss": 1.2708, + "step": 3141 + }, + { + "epoch": 0.66, + "learning_rate": 1.0897735796132056e-05, + "loss": 1.2226, + "step": 3142 + }, + { + "epoch": 0.66, + "learning_rate": 1.0885604404617221e-05, + "loss": 1.2381, + "step": 3143 + }, + { + "epoch": 0.66, + "learning_rate": 1.087347724404717e-05, + "loss": 1.2114, + "step": 3144 + }, + { + "epoch": 0.66, + "learning_rate": 1.086135432005141e-05, + "loss": 1.2417, + "step": 3145 + }, + { + "epoch": 0.66, + "learning_rate": 1.0849235638257442e-05, + "loss": 1.2634, + "step": 3146 + }, + { + "epoch": 0.66, + "learning_rate": 1.0837121204290812e-05, + "loss": 1.2382, + "step": 3147 + }, + { + "epoch": 0.66, + "learning_rate": 1.0825011023775111e-05, + "loss": 1.2725, + "step": 3148 + }, + { + "epoch": 0.66, + "learning_rate": 1.0812905102331927e-05, + "loss": 1.2429, + "step": 3149 + }, + { + "epoch": 0.66, + "learning_rate": 1.0800803445580896e-05, + "loss": 1.2722, + "step": 3150 + }, + { + "epoch": 0.66, + "learning_rate": 1.078870605913966e-05, + "loss": 1.2428, + "step": 3151 + }, + { + "epoch": 0.66, + "learning_rate": 1.0776612948623874e-05, + "loss": 1.2101, + "step": 3152 + }, + { + "epoch": 0.66, + "learning_rate": 1.0764524119647228e-05, + "loss": 1.2547, + "step": 3153 + }, + { + "epoch": 0.66, + "learning_rate": 1.0752439577821398e-05, + "loss": 1.2091, + "step": 3154 + }, + { + "epoch": 0.66, + "learning_rate": 1.0740359328756105e-05, + "loss": 1.2598, + "step": 3155 + }, + { + "epoch": 0.66, + "learning_rate": 1.0728283378059036e-05, + "loss": 1.2172, + "step": 3156 + }, + { + "epoch": 0.66, + "learning_rate": 1.0716211731335922e-05, + "loss": 1.2211, + "step": 3157 + }, + { + "epoch": 0.66, + "learning_rate": 1.0704144394190458e-05, + "loss": 1.2192, + "step": 3158 + }, + { + "epoch": 0.66, + "learning_rate": 1.0692081372224378e-05, + "loss": 1.2634, + "step": 3159 + }, + { + "epoch": 0.66, + "learning_rate": 1.0680022671037376e-05, + "loss": 1.2721, + "step": 3160 + }, + { + "epoch": 0.66, + "learning_rate": 1.0667968296227169e-05, + "loss": 1.2578, + "step": 3161 + }, + { + "epoch": 0.67, + "learning_rate": 1.0655918253389452e-05, + "loss": 1.2648, + "step": 3162 + }, + { + "epoch": 0.67, + "learning_rate": 1.06438725481179e-05, + "loss": 1.2056, + "step": 3163 + }, + { + "epoch": 0.67, + "learning_rate": 1.06318311860042e-05, + "loss": 1.2825, + "step": 3164 + }, + { + "epoch": 0.67, + "learning_rate": 1.0619794172637995e-05, + "loss": 1.2496, + "step": 3165 + }, + { + "epoch": 0.67, + "learning_rate": 1.0607761513606935e-05, + "loss": 1.2619, + "step": 3166 + }, + { + "epoch": 0.67, + "learning_rate": 1.0595733214496633e-05, + "loss": 1.251, + "step": 3167 + }, + { + "epoch": 0.67, + "learning_rate": 1.0583709280890668e-05, + "loss": 1.2337, + "step": 3168 + }, + { + "epoch": 0.67, + "learning_rate": 1.0571689718370629e-05, + "loss": 1.2735, + "step": 3169 + }, + { + "epoch": 0.67, + "learning_rate": 1.0559674532516033e-05, + "loss": 1.2749, + "step": 3170 + }, + { + "epoch": 0.67, + "learning_rate": 1.0547663728904392e-05, + "loss": 1.2735, + "step": 3171 + }, + { + "epoch": 0.67, + "learning_rate": 1.0535657313111183e-05, + "loss": 1.2379, + "step": 3172 + }, + { + "epoch": 0.67, + "learning_rate": 1.0523655290709825e-05, + "loss": 1.2867, + "step": 3173 + }, + { + "epoch": 0.67, + "learning_rate": 1.0511657667271731e-05, + "loss": 1.2593, + "step": 3174 + }, + { + "epoch": 0.67, + "learning_rate": 1.0499664448366245e-05, + "loss": 1.246, + "step": 3175 + }, + { + "epoch": 0.67, + "learning_rate": 1.0487675639560664e-05, + "loss": 1.3067, + "step": 3176 + }, + { + "epoch": 0.67, + "learning_rate": 1.0475691246420267e-05, + "loss": 1.2627, + "step": 3177 + }, + { + "epoch": 0.67, + "learning_rate": 1.0463711274508253e-05, + "loss": 1.2061, + "step": 3178 + }, + { + "epoch": 0.67, + "learning_rate": 1.045173572938579e-05, + "loss": 1.2407, + "step": 3179 + }, + { + "epoch": 0.67, + "learning_rate": 1.0439764616611972e-05, + "loss": 1.2155, + "step": 3180 + }, + { + "epoch": 0.67, + "learning_rate": 1.0427797941743854e-05, + "loss": 1.2505, + "step": 3181 + }, + { + "epoch": 0.67, + "learning_rate": 1.041583571033641e-05, + "loss": 1.2293, + "step": 3182 + }, + { + "epoch": 0.67, + "learning_rate": 1.0403877927942582e-05, + "loss": 1.2287, + "step": 3183 + }, + { + "epoch": 0.67, + "learning_rate": 1.0391924600113211e-05, + "loss": 1.1778, + "step": 3184 + }, + { + "epoch": 0.67, + "learning_rate": 1.0379975732397096e-05, + "loss": 1.2483, + "step": 3185 + }, + { + "epoch": 0.67, + "learning_rate": 1.0368031330340948e-05, + "loss": 1.1827, + "step": 3186 + }, + { + "epoch": 0.67, + "learning_rate": 1.0356091399489431e-05, + "loss": 1.2479, + "step": 3187 + }, + { + "epoch": 0.67, + "learning_rate": 1.0344155945385106e-05, + "loss": 1.2303, + "step": 3188 + }, + { + "epoch": 0.67, + "learning_rate": 1.0332224973568458e-05, + "loss": 1.1771, + "step": 3189 + }, + { + "epoch": 0.67, + "learning_rate": 1.0320298489577913e-05, + "loss": 1.2381, + "step": 3190 + }, + { + "epoch": 0.67, + "learning_rate": 1.03083764989498e-05, + "loss": 1.2529, + "step": 3191 + }, + { + "epoch": 0.67, + "learning_rate": 1.0296459007218345e-05, + "loss": 1.2194, + "step": 3192 + }, + { + "epoch": 0.67, + "learning_rate": 1.0284546019915727e-05, + "loss": 1.2178, + "step": 3193 + }, + { + "epoch": 0.67, + "learning_rate": 1.0272637542571988e-05, + "loss": 1.228, + "step": 3194 + }, + { + "epoch": 0.67, + "learning_rate": 1.026073358071512e-05, + "loss": 1.2682, + "step": 3195 + }, + { + "epoch": 0.67, + "learning_rate": 1.0248834139870985e-05, + "loss": 1.2121, + "step": 3196 + }, + { + "epoch": 0.67, + "learning_rate": 1.0236939225563351e-05, + "loss": 1.2508, + "step": 3197 + }, + { + "epoch": 0.67, + "learning_rate": 1.0225048843313914e-05, + "loss": 1.216, + "step": 3198 + }, + { + "epoch": 0.67, + "learning_rate": 1.021316299864223e-05, + "loss": 1.1624, + "step": 3199 + }, + { + "epoch": 0.67, + "learning_rate": 1.0201281697065757e-05, + "loss": 1.2211, + "step": 3200 + }, + { + "epoch": 0.67, + "learning_rate": 1.0189404944099867e-05, + "loss": 1.2744, + "step": 3201 + }, + { + "epoch": 0.67, + "learning_rate": 1.017753274525779e-05, + "loss": 1.215, + "step": 3202 + }, + { + "epoch": 0.67, + "learning_rate": 1.016566510605067e-05, + "loss": 1.217, + "step": 3203 + }, + { + "epoch": 0.67, + "learning_rate": 1.0153802031987504e-05, + "loss": 1.25, + "step": 3204 + }, + { + "epoch": 0.67, + "learning_rate": 1.0141943528575205e-05, + "loss": 1.2136, + "step": 3205 + }, + { + "epoch": 0.67, + "learning_rate": 1.0130089601318525e-05, + "loss": 1.2514, + "step": 3206 + }, + { + "epoch": 0.67, + "learning_rate": 1.0118240255720128e-05, + "loss": 1.2354, + "step": 3207 + }, + { + "epoch": 0.67, + "learning_rate": 1.0106395497280524e-05, + "loss": 1.2434, + "step": 3208 + }, + { + "epoch": 0.67, + "learning_rate": 1.0094555331498118e-05, + "loss": 1.2592, + "step": 3209 + }, + { + "epoch": 0.68, + "learning_rate": 1.0082719763869153e-05, + "loss": 1.1914, + "step": 3210 + }, + { + "epoch": 0.68, + "learning_rate": 1.0070888799887772e-05, + "loss": 1.2514, + "step": 3211 + }, + { + "epoch": 0.68, + "learning_rate": 1.0059062445045957e-05, + "loss": 1.2839, + "step": 3212 + }, + { + "epoch": 0.68, + "learning_rate": 1.0047240704833544e-05, + "loss": 1.2125, + "step": 3213 + }, + { + "epoch": 0.68, + "learning_rate": 1.0035423584738262e-05, + "loss": 1.2671, + "step": 3214 + }, + { + "epoch": 0.68, + "learning_rate": 1.0023611090245653e-05, + "loss": 1.2732, + "step": 3215 + }, + { + "epoch": 0.68, + "learning_rate": 1.0011803226839148e-05, + "loss": 1.2836, + "step": 3216 + }, + { + "epoch": 0.68, + "learning_rate": 1.0000000000000006e-05, + "loss": 1.2816, + "step": 3217 + }, + { + "epoch": 0.68, + "learning_rate": 9.988201415207327e-06, + "loss": 1.2156, + "step": 3218 + }, + { + "epoch": 0.68, + "learning_rate": 9.976407477938092e-06, + "loss": 1.2123, + "step": 3219 + }, + { + "epoch": 0.68, + "learning_rate": 9.96461819366709e-06, + "loss": 1.2187, + "step": 3220 + }, + { + "epoch": 0.68, + "learning_rate": 9.952833567866954e-06, + "loss": 1.257, + "step": 3221 + }, + { + "epoch": 0.68, + "learning_rate": 9.941053606008176e-06, + "loss": 1.2064, + "step": 3222 + }, + { + "epoch": 0.68, + "learning_rate": 9.929278313559054e-06, + "loss": 1.2562, + "step": 3223 + }, + { + "epoch": 0.68, + "learning_rate": 9.917507695985752e-06, + "loss": 1.2479, + "step": 3224 + }, + { + "epoch": 0.68, + "learning_rate": 9.905741758752234e-06, + "loss": 1.2899, + "step": 3225 + }, + { + "epoch": 0.68, + "learning_rate": 9.893980507320295e-06, + "loss": 1.2659, + "step": 3226 + }, + { + "epoch": 0.68, + "learning_rate": 9.882223947149583e-06, + "loss": 1.2383, + "step": 3227 + }, + { + "epoch": 0.68, + "learning_rate": 9.870472083697526e-06, + "loss": 1.2468, + "step": 3228 + }, + { + "epoch": 0.68, + "learning_rate": 9.858724922419413e-06, + "loss": 1.2514, + "step": 3229 + }, + { + "epoch": 0.68, + "learning_rate": 9.846982468768316e-06, + "loss": 1.2496, + "step": 3230 + }, + { + "epoch": 0.68, + "learning_rate": 9.83524472819515e-06, + "loss": 1.2408, + "step": 3231 + }, + { + "epoch": 0.68, + "learning_rate": 9.823511706148612e-06, + "loss": 1.2441, + "step": 3232 + }, + { + "epoch": 0.68, + "learning_rate": 9.811783408075244e-06, + "loss": 1.2472, + "step": 3233 + }, + { + "epoch": 0.68, + "learning_rate": 9.800059839419358e-06, + "loss": 1.2464, + "step": 3234 + }, + { + "epoch": 0.68, + "learning_rate": 9.788341005623107e-06, + "loss": 1.2239, + "step": 3235 + }, + { + "epoch": 0.68, + "learning_rate": 9.776626912126413e-06, + "loss": 1.2192, + "step": 3236 + }, + { + "epoch": 0.68, + "learning_rate": 9.764917564367025e-06, + "loss": 1.2329, + "step": 3237 + }, + { + "epoch": 0.68, + "learning_rate": 9.753212967780472e-06, + "loss": 1.2609, + "step": 3238 + }, + { + "epoch": 0.68, + "learning_rate": 9.741513127800072e-06, + "loss": 1.2723, + "step": 3239 + }, + { + "epoch": 0.68, + "learning_rate": 9.729818049856963e-06, + "loss": 1.2593, + "step": 3240 + }, + { + "epoch": 0.68, + "learning_rate": 9.718127739380043e-06, + "loss": 1.2197, + "step": 3241 + }, + { + "epoch": 0.68, + "learning_rate": 9.706442201796007e-06, + "loss": 1.2573, + "step": 3242 + }, + { + "epoch": 0.68, + "learning_rate": 9.694761442529345e-06, + "loss": 1.196, + "step": 3243 + }, + { + "epoch": 0.68, + "learning_rate": 9.683085467002306e-06, + "loss": 1.2149, + "step": 3244 + }, + { + "epoch": 0.68, + "learning_rate": 9.67141428063495e-06, + "loss": 1.2241, + "step": 3245 + }, + { + "epoch": 0.68, + "learning_rate": 9.659747888845087e-06, + "loss": 1.2784, + "step": 3246 + }, + { + "epoch": 0.68, + "learning_rate": 9.648086297048302e-06, + "loss": 1.2894, + "step": 3247 + }, + { + "epoch": 0.68, + "learning_rate": 9.636429510657974e-06, + "loss": 1.2243, + "step": 3248 + }, + { + "epoch": 0.68, + "learning_rate": 9.624777535085233e-06, + "loss": 1.1729, + "step": 3249 + }, + { + "epoch": 0.68, + "learning_rate": 9.61313037573897e-06, + "loss": 1.2249, + "step": 3250 + }, + { + "epoch": 0.68, + "learning_rate": 9.601488038025869e-06, + "loss": 1.2333, + "step": 3251 + }, + { + "epoch": 0.68, + "learning_rate": 9.589850527350337e-06, + "loss": 1.2303, + "step": 3252 + }, + { + "epoch": 0.68, + "learning_rate": 9.578217849114579e-06, + "loss": 1.2754, + "step": 3253 + }, + { + "epoch": 0.68, + "learning_rate": 9.566590008718524e-06, + "loss": 1.2453, + "step": 3254 + }, + { + "epoch": 0.68, + "learning_rate": 9.554967011559874e-06, + "loss": 1.1803, + "step": 3255 + }, + { + "epoch": 0.68, + "learning_rate": 9.54334886303409e-06, + "loss": 1.2374, + "step": 3256 + }, + { + "epoch": 0.69, + "learning_rate": 9.53173556853435e-06, + "loss": 1.2788, + "step": 3257 + }, + { + "epoch": 0.69, + "learning_rate": 9.520127133451619e-06, + "loss": 1.1753, + "step": 3258 + }, + { + "epoch": 0.69, + "learning_rate": 9.508523563174578e-06, + "loss": 1.2168, + "step": 3259 + }, + { + "epoch": 0.69, + "learning_rate": 9.496924863089652e-06, + "loss": 1.2706, + "step": 3260 + }, + { + "epoch": 0.69, + "learning_rate": 9.485331038581021e-06, + "loss": 1.1991, + "step": 3261 + }, + { + "epoch": 0.69, + "learning_rate": 9.473742095030588e-06, + "loss": 1.2774, + "step": 3262 + }, + { + "epoch": 0.69, + "learning_rate": 9.46215803781799e-06, + "loss": 1.256, + "step": 3263 + }, + { + "epoch": 0.69, + "learning_rate": 9.450578872320613e-06, + "loss": 1.2213, + "step": 3264 + }, + { + "epoch": 0.69, + "learning_rate": 9.439004603913542e-06, + "loss": 1.2804, + "step": 3265 + }, + { + "epoch": 0.69, + "learning_rate": 9.427435237969624e-06, + "loss": 1.2548, + "step": 3266 + }, + { + "epoch": 0.69, + "learning_rate": 9.415870779859405e-06, + "loss": 1.2665, + "step": 3267 + }, + { + "epoch": 0.69, + "learning_rate": 9.404311234951148e-06, + "loss": 1.2561, + "step": 3268 + }, + { + "epoch": 0.69, + "learning_rate": 9.392756608610871e-06, + "loss": 1.239, + "step": 3269 + }, + { + "epoch": 0.69, + "learning_rate": 9.381206906202268e-06, + "loss": 1.2774, + "step": 3270 + }, + { + "epoch": 0.69, + "learning_rate": 9.369662133086768e-06, + "loss": 1.2445, + "step": 3271 + }, + { + "epoch": 0.69, + "learning_rate": 9.358122294623514e-06, + "loss": 1.2767, + "step": 3272 + }, + { + "epoch": 0.69, + "learning_rate": 9.34658739616934e-06, + "loss": 1.2274, + "step": 3273 + }, + { + "epoch": 0.69, + "learning_rate": 9.335057443078817e-06, + "loss": 1.2329, + "step": 3274 + }, + { + "epoch": 0.69, + "learning_rate": 9.323532440704196e-06, + "loss": 1.2633, + "step": 3275 + }, + { + "epoch": 0.69, + "learning_rate": 9.312012394395423e-06, + "loss": 1.2442, + "step": 3276 + }, + { + "epoch": 0.69, + "learning_rate": 9.300497309500176e-06, + "loss": 1.2476, + "step": 3277 + }, + { + "epoch": 0.69, + "learning_rate": 9.288987191363799e-06, + "loss": 1.2524, + "step": 3278 + }, + { + "epoch": 0.69, + "learning_rate": 9.277482045329344e-06, + "loss": 1.2107, + "step": 3279 + }, + { + "epoch": 0.69, + "learning_rate": 9.265981876737566e-06, + "loss": 1.235, + "step": 3280 + }, + { + "epoch": 0.69, + "learning_rate": 9.254486690926878e-06, + "loss": 1.2543, + "step": 3281 + }, + { + "epoch": 0.69, + "learning_rate": 9.242996493233414e-06, + "loss": 1.281, + "step": 3282 + }, + { + "epoch": 0.69, + "learning_rate": 9.23151128899097e-06, + "loss": 1.2388, + "step": 3283 + }, + { + "epoch": 0.69, + "learning_rate": 9.220031083531026e-06, + "loss": 1.2788, + "step": 3284 + }, + { + "epoch": 0.69, + "learning_rate": 9.208555882182762e-06, + "loss": 1.2302, + "step": 3285 + }, + { + "epoch": 0.69, + "learning_rate": 9.197085690273e-06, + "loss": 1.2492, + "step": 3286 + }, + { + "epoch": 0.69, + "learning_rate": 9.185620513126275e-06, + "loss": 1.2273, + "step": 3287 + }, + { + "epoch": 0.69, + "learning_rate": 9.174160356064765e-06, + "loss": 1.2494, + "step": 3288 + }, + { + "epoch": 0.69, + "learning_rate": 9.162705224408326e-06, + "loss": 1.2193, + "step": 3289 + }, + { + "epoch": 0.69, + "learning_rate": 9.151255123474493e-06, + "loss": 1.275, + "step": 3290 + }, + { + "epoch": 0.69, + "learning_rate": 9.139810058578451e-06, + "loss": 1.2566, + "step": 3291 + }, + { + "epoch": 0.69, + "learning_rate": 9.128370035033046e-06, + "loss": 1.2343, + "step": 3292 + }, + { + "epoch": 0.69, + "learning_rate": 9.116935058148801e-06, + "loss": 1.2535, + "step": 3293 + }, + { + "epoch": 0.69, + "learning_rate": 9.105505133233876e-06, + "loss": 1.2429, + "step": 3294 + }, + { + "epoch": 0.69, + "learning_rate": 9.094080265594108e-06, + "loss": 1.2582, + "step": 3295 + }, + { + "epoch": 0.69, + "learning_rate": 9.082660460532961e-06, + "loss": 1.2402, + "step": 3296 + }, + { + "epoch": 0.69, + "learning_rate": 9.071245723351563e-06, + "loss": 1.2902, + "step": 3297 + }, + { + "epoch": 0.69, + "learning_rate": 9.059836059348696e-06, + "loss": 1.2472, + "step": 3298 + }, + { + "epoch": 0.69, + "learning_rate": 9.048431473820776e-06, + "loss": 1.2695, + "step": 3299 + }, + { + "epoch": 0.69, + "learning_rate": 9.037031972061854e-06, + "loss": 1.2196, + "step": 3300 + }, + { + "epoch": 0.69, + "learning_rate": 9.02563755936365e-06, + "loss": 1.2352, + "step": 3301 + }, + { + "epoch": 0.69, + "learning_rate": 9.014248241015484e-06, + "loss": 1.2558, + "step": 3302 + }, + { + "epoch": 0.69, + "learning_rate": 9.00286402230434e-06, + "loss": 1.2743, + "step": 3303 + }, + { + "epoch": 0.69, + "learning_rate": 8.991484908514835e-06, + "loss": 1.2453, + "step": 3304 + }, + { + "epoch": 0.7, + "learning_rate": 8.980110904929189e-06, + "loss": 1.2606, + "step": 3305 + }, + { + "epoch": 0.7, + "learning_rate": 8.968742016827283e-06, + "loss": 1.2222, + "step": 3306 + }, + { + "epoch": 0.7, + "learning_rate": 8.957378249486592e-06, + "loss": 1.2485, + "step": 3307 + }, + { + "epoch": 0.7, + "learning_rate": 8.946019608182245e-06, + "loss": 1.3003, + "step": 3308 + }, + { + "epoch": 0.7, + "learning_rate": 8.93466609818697e-06, + "loss": 1.2299, + "step": 3309 + }, + { + "epoch": 0.7, + "learning_rate": 8.92331772477111e-06, + "loss": 1.2602, + "step": 3310 + }, + { + "epoch": 0.7, + "learning_rate": 8.91197449320265e-06, + "loss": 1.2744, + "step": 3311 + }, + { + "epoch": 0.7, + "learning_rate": 8.900636408747156e-06, + "loss": 1.2881, + "step": 3312 + }, + { + "epoch": 0.7, + "learning_rate": 8.889303476667823e-06, + "loss": 1.2524, + "step": 3313 + }, + { + "epoch": 0.7, + "learning_rate": 8.877975702225457e-06, + "loss": 1.2903, + "step": 3314 + }, + { + "epoch": 0.7, + "learning_rate": 8.866653090678452e-06, + "loss": 1.257, + "step": 3315 + }, + { + "epoch": 0.7, + "learning_rate": 8.855335647282833e-06, + "loss": 1.2713, + "step": 3316 + }, + { + "epoch": 0.7, + "learning_rate": 8.844023377292198e-06, + "loss": 1.2491, + "step": 3317 + }, + { + "epoch": 0.7, + "learning_rate": 8.832716285957754e-06, + "loss": 1.2851, + "step": 3318 + }, + { + "epoch": 0.7, + "learning_rate": 8.821414378528314e-06, + "loss": 1.2564, + "step": 3319 + }, + { + "epoch": 0.7, + "learning_rate": 8.810117660250275e-06, + "loss": 1.2711, + "step": 3320 + }, + { + "epoch": 0.7, + "learning_rate": 8.798826136367613e-06, + "loss": 1.2451, + "step": 3321 + }, + { + "epoch": 0.7, + "learning_rate": 8.787539812121924e-06, + "loss": 1.2593, + "step": 3322 + }, + { + "epoch": 0.7, + "learning_rate": 8.776258692752355e-06, + "loss": 1.2359, + "step": 3323 + }, + { + "epoch": 0.7, + "learning_rate": 8.76498278349567e-06, + "loss": 1.267, + "step": 3324 + }, + { + "epoch": 0.7, + "learning_rate": 8.753712089586184e-06, + "loss": 1.2868, + "step": 3325 + }, + { + "epoch": 0.7, + "learning_rate": 8.74244661625582e-06, + "loss": 1.2327, + "step": 3326 + }, + { + "epoch": 0.7, + "learning_rate": 8.731186368734049e-06, + "loss": 1.2488, + "step": 3327 + }, + { + "epoch": 0.7, + "learning_rate": 8.719931352247944e-06, + "loss": 1.2485, + "step": 3328 + }, + { + "epoch": 0.7, + "learning_rate": 8.708681572022122e-06, + "loss": 1.2541, + "step": 3329 + }, + { + "epoch": 0.7, + "learning_rate": 8.697437033278797e-06, + "loss": 1.2607, + "step": 3330 + }, + { + "epoch": 0.7, + "learning_rate": 8.686197741237722e-06, + "loss": 1.2404, + "step": 3331 + }, + { + "epoch": 0.7, + "learning_rate": 8.674963701116243e-06, + "loss": 1.2154, + "step": 3332 + }, + { + "epoch": 0.7, + "learning_rate": 8.663734918129247e-06, + "loss": 1.2064, + "step": 3333 + }, + { + "epoch": 0.7, + "learning_rate": 8.652511397489181e-06, + "loss": 1.2387, + "step": 3334 + }, + { + "epoch": 0.7, + "learning_rate": 8.641293144406067e-06, + "loss": 1.2578, + "step": 3335 + }, + { + "epoch": 0.7, + "learning_rate": 8.630080164087456e-06, + "loss": 1.2699, + "step": 3336 + }, + { + "epoch": 0.7, + "learning_rate": 8.618872461738483e-06, + "loss": 1.212, + "step": 3337 + }, + { + "epoch": 0.7, + "learning_rate": 8.607670042561807e-06, + "loss": 1.26, + "step": 3338 + }, + { + "epoch": 0.7, + "learning_rate": 8.596472911757633e-06, + "loss": 1.2571, + "step": 3339 + }, + { + "epoch": 0.7, + "learning_rate": 8.58528107452374e-06, + "loss": 1.2404, + "step": 3340 + }, + { + "epoch": 0.7, + "learning_rate": 8.574094536055423e-06, + "loss": 1.261, + "step": 3341 + }, + { + "epoch": 0.7, + "learning_rate": 8.562913301545513e-06, + "loss": 1.2811, + "step": 3342 + }, + { + "epoch": 0.7, + "learning_rate": 8.55173737618441e-06, + "loss": 1.2974, + "step": 3343 + }, + { + "epoch": 0.7, + "learning_rate": 8.540566765160016e-06, + "loss": 1.2205, + "step": 3344 + }, + { + "epoch": 0.7, + "learning_rate": 8.529401473657795e-06, + "loss": 1.2413, + "step": 3345 + }, + { + "epoch": 0.7, + "learning_rate": 8.518241506860719e-06, + "loss": 1.2487, + "step": 3346 + }, + { + "epoch": 0.7, + "learning_rate": 8.507086869949287e-06, + "loss": 1.2469, + "step": 3347 + }, + { + "epoch": 0.7, + "learning_rate": 8.495937568101551e-06, + "loss": 1.2834, + "step": 3348 + }, + { + "epoch": 0.7, + "learning_rate": 8.484793606493054e-06, + "loss": 1.2322, + "step": 3349 + }, + { + "epoch": 0.7, + "learning_rate": 8.473654990296887e-06, + "loss": 1.2866, + "step": 3350 + }, + { + "epoch": 0.7, + "learning_rate": 8.462521724683637e-06, + "loss": 1.202, + "step": 3351 + }, + { + "epoch": 0.71, + "learning_rate": 8.451393814821427e-06, + "loss": 1.2378, + "step": 3352 + }, + { + "epoch": 0.71, + "learning_rate": 8.440271265875875e-06, + "loss": 1.2491, + "step": 3353 + }, + { + "epoch": 0.71, + "learning_rate": 8.42915408301013e-06, + "loss": 1.214, + "step": 3354 + }, + { + "epoch": 0.71, + "learning_rate": 8.418042271384828e-06, + "loss": 1.2479, + "step": 3355 + }, + { + "epoch": 0.71, + "learning_rate": 8.406935836158138e-06, + "loss": 1.209, + "step": 3356 + }, + { + "epoch": 0.71, + "learning_rate": 8.39583478248571e-06, + "loss": 1.235, + "step": 3357 + }, + { + "epoch": 0.71, + "learning_rate": 8.3847391155207e-06, + "loss": 1.27, + "step": 3358 + }, + { + "epoch": 0.71, + "learning_rate": 8.373648840413781e-06, + "loss": 1.2391, + "step": 3359 + }, + { + "epoch": 0.71, + "learning_rate": 8.362563962313095e-06, + "loss": 1.2589, + "step": 3360 + }, + { + "epoch": 0.71, + "learning_rate": 8.35148448636431e-06, + "loss": 1.1859, + "step": 3361 + }, + { + "epoch": 0.71, + "learning_rate": 8.340410417710562e-06, + "loss": 1.2299, + "step": 3362 + }, + { + "epoch": 0.71, + "learning_rate": 8.32934176149248e-06, + "loss": 1.2465, + "step": 3363 + }, + { + "epoch": 0.71, + "learning_rate": 8.318278522848198e-06, + "loss": 1.2249, + "step": 3364 + }, + { + "epoch": 0.71, + "learning_rate": 8.307220706913308e-06, + "loss": 1.2563, + "step": 3365 + }, + { + "epoch": 0.71, + "learning_rate": 8.296168318820914e-06, + "loss": 1.2431, + "step": 3366 + }, + { + "epoch": 0.71, + "learning_rate": 8.28512136370158e-06, + "loss": 1.2983, + "step": 3367 + }, + { + "epoch": 0.71, + "learning_rate": 8.274079846683346e-06, + "loss": 1.257, + "step": 3368 + }, + { + "epoch": 0.71, + "learning_rate": 8.263043772891752e-06, + "loss": 1.2664, + "step": 3369 + }, + { + "epoch": 0.71, + "learning_rate": 8.252013147449785e-06, + "loss": 1.2095, + "step": 3370 + }, + { + "epoch": 0.71, + "learning_rate": 8.240987975477903e-06, + "loss": 1.2258, + "step": 3371 + }, + { + "epoch": 0.71, + "learning_rate": 8.229968262094064e-06, + "loss": 1.2169, + "step": 3372 + }, + { + "epoch": 0.71, + "learning_rate": 8.218954012413647e-06, + "loss": 1.2153, + "step": 3373 + }, + { + "epoch": 0.71, + "learning_rate": 8.207945231549539e-06, + "loss": 1.2195, + "step": 3374 + }, + { + "epoch": 0.71, + "learning_rate": 8.19694192461205e-06, + "loss": 1.2949, + "step": 3375 + }, + { + "epoch": 0.71, + "learning_rate": 8.185944096708982e-06, + "loss": 1.23, + "step": 3376 + }, + { + "epoch": 0.71, + "learning_rate": 8.17495175294556e-06, + "loss": 1.2486, + "step": 3377 + }, + { + "epoch": 0.71, + "learning_rate": 8.1639648984245e-06, + "loss": 1.2274, + "step": 3378 + }, + { + "epoch": 0.71, + "learning_rate": 8.152983538245933e-06, + "loss": 1.2346, + "step": 3379 + }, + { + "epoch": 0.71, + "learning_rate": 8.142007677507475e-06, + "loss": 1.284, + "step": 3380 + }, + { + "epoch": 0.71, + "learning_rate": 8.131037321304154e-06, + "loss": 1.2561, + "step": 3381 + }, + { + "epoch": 0.71, + "learning_rate": 8.120072474728476e-06, + "loss": 1.2514, + "step": 3382 + }, + { + "epoch": 0.71, + "learning_rate": 8.10911314287037e-06, + "loss": 1.2614, + "step": 3383 + }, + { + "epoch": 0.71, + "learning_rate": 8.098159330817192e-06, + "loss": 1.2331, + "step": 3384 + }, + { + "epoch": 0.71, + "learning_rate": 8.087211043653777e-06, + "loss": 1.2503, + "step": 3385 + }, + { + "epoch": 0.71, + "learning_rate": 8.076268286462352e-06, + "loss": 1.2748, + "step": 3386 + }, + { + "epoch": 0.71, + "learning_rate": 8.065331064322612e-06, + "loss": 1.2431, + "step": 3387 + }, + { + "epoch": 0.71, + "learning_rate": 8.054399382311657e-06, + "loss": 1.1891, + "step": 3388 + }, + { + "epoch": 0.71, + "learning_rate": 8.043473245504017e-06, + "loss": 1.2109, + "step": 3389 + }, + { + "epoch": 0.71, + "learning_rate": 8.032552658971672e-06, + "loss": 1.2667, + "step": 3390 + }, + { + "epoch": 0.71, + "learning_rate": 8.021637627784e-06, + "loss": 1.2332, + "step": 3391 + }, + { + "epoch": 0.71, + "learning_rate": 8.010728157007805e-06, + "loss": 1.183, + "step": 3392 + }, + { + "epoch": 0.71, + "learning_rate": 7.999824251707324e-06, + "loss": 1.2525, + "step": 3393 + }, + { + "epoch": 0.71, + "learning_rate": 7.98892591694419e-06, + "loss": 1.256, + "step": 3394 + }, + { + "epoch": 0.71, + "learning_rate": 7.978033157777473e-06, + "loss": 1.2587, + "step": 3395 + }, + { + "epoch": 0.71, + "learning_rate": 7.967145979263637e-06, + "loss": 1.2686, + "step": 3396 + }, + { + "epoch": 0.71, + "learning_rate": 7.956264386456551e-06, + "loss": 1.2134, + "step": 3397 + }, + { + "epoch": 0.71, + "learning_rate": 7.945388384407518e-06, + "loss": 1.244, + "step": 3398 + }, + { + "epoch": 0.71, + "learning_rate": 7.934517978165211e-06, + "loss": 1.2408, + "step": 3399 + }, + { + "epoch": 0.72, + "learning_rate": 7.92365317277574e-06, + "loss": 1.2673, + "step": 3400 + }, + { + "epoch": 0.72, + "learning_rate": 7.912793973282584e-06, + "loss": 1.2307, + "step": 3401 + }, + { + "epoch": 0.72, + "learning_rate": 7.90194038472665e-06, + "loss": 1.256, + "step": 3402 + }, + { + "epoch": 0.72, + "learning_rate": 7.891092412146204e-06, + "loss": 1.2364, + "step": 3403 + }, + { + "epoch": 0.72, + "learning_rate": 7.880250060576943e-06, + "loss": 1.2482, + "step": 3404 + }, + { + "epoch": 0.72, + "learning_rate": 7.869413335051926e-06, + "loss": 1.2059, + "step": 3405 + }, + { + "epoch": 0.72, + "learning_rate": 7.85858224060162e-06, + "loss": 1.2578, + "step": 3406 + }, + { + "epoch": 0.72, + "learning_rate": 7.847756782253864e-06, + "loss": 1.1665, + "step": 3407 + }, + { + "epoch": 0.72, + "learning_rate": 7.836936965033881e-06, + "loss": 1.2442, + "step": 3408 + }, + { + "epoch": 0.72, + "learning_rate": 7.826122793964293e-06, + "loss": 1.2477, + "step": 3409 + }, + { + "epoch": 0.72, + "learning_rate": 7.815314274065074e-06, + "loss": 1.2205, + "step": 3410 + }, + { + "epoch": 0.72, + "learning_rate": 7.804511410353603e-06, + "loss": 1.2614, + "step": 3411 + }, + { + "epoch": 0.72, + "learning_rate": 7.793714207844616e-06, + "loss": 1.2646, + "step": 3412 + }, + { + "epoch": 0.72, + "learning_rate": 7.782922671550213e-06, + "loss": 1.2035, + "step": 3413 + }, + { + "epoch": 0.72, + "learning_rate": 7.772136806479891e-06, + "loss": 1.1502, + "step": 3414 + }, + { + "epoch": 0.72, + "learning_rate": 7.761356617640485e-06, + "loss": 1.2283, + "step": 3415 + }, + { + "epoch": 0.72, + "learning_rate": 7.750582110036225e-06, + "loss": 1.2111, + "step": 3416 + }, + { + "epoch": 0.72, + "learning_rate": 7.739813288668677e-06, + "loss": 1.257, + "step": 3417 + }, + { + "epoch": 0.72, + "learning_rate": 7.72905015853677e-06, + "loss": 1.2329, + "step": 3418 + }, + { + "epoch": 0.72, + "learning_rate": 7.718292724636815e-06, + "loss": 1.2531, + "step": 3419 + }, + { + "epoch": 0.72, + "learning_rate": 7.70754099196246e-06, + "loss": 1.2305, + "step": 3420 + }, + { + "epoch": 0.72, + "learning_rate": 7.696794965504695e-06, + "loss": 1.2241, + "step": 3421 + }, + { + "epoch": 0.72, + "learning_rate": 7.686054650251893e-06, + "loss": 1.2656, + "step": 3422 + }, + { + "epoch": 0.72, + "learning_rate": 7.675320051189746e-06, + "loss": 1.1752, + "step": 3423 + }, + { + "epoch": 0.72, + "learning_rate": 7.664591173301315e-06, + "loss": 1.2414, + "step": 3424 + }, + { + "epoch": 0.72, + "learning_rate": 7.65386802156698e-06, + "loss": 1.2726, + "step": 3425 + }, + { + "epoch": 0.72, + "learning_rate": 7.64315060096449e-06, + "loss": 1.2527, + "step": 3426 + }, + { + "epoch": 0.72, + "learning_rate": 7.632438916468928e-06, + "loss": 1.2549, + "step": 3427 + }, + { + "epoch": 0.72, + "learning_rate": 7.621732973052696e-06, + "loss": 1.1833, + "step": 3428 + }, + { + "epoch": 0.72, + "learning_rate": 7.611032775685541e-06, + "loss": 1.2045, + "step": 3429 + }, + { + "epoch": 0.72, + "learning_rate": 7.600338329334554e-06, + "loss": 1.2253, + "step": 3430 + }, + { + "epoch": 0.72, + "learning_rate": 7.5896496389641336e-06, + "loss": 1.2404, + "step": 3431 + }, + { + "epoch": 0.72, + "learning_rate": 7.5789667095360355e-06, + "loss": 1.2634, + "step": 3432 + }, + { + "epoch": 0.72, + "learning_rate": 7.568289546009316e-06, + "loss": 1.2626, + "step": 3433 + }, + { + "epoch": 0.72, + "learning_rate": 7.557618153340358e-06, + "loss": 1.2234, + "step": 3434 + }, + { + "epoch": 0.72, + "learning_rate": 7.546952536482888e-06, + "loss": 1.2331, + "step": 3435 + }, + { + "epoch": 0.72, + "learning_rate": 7.536292700387924e-06, + "loss": 1.2757, + "step": 3436 + }, + { + "epoch": 0.72, + "learning_rate": 7.5256386500038055e-06, + "loss": 1.1817, + "step": 3437 + }, + { + "epoch": 0.72, + "learning_rate": 7.5149903902762066e-06, + "loss": 1.2456, + "step": 3438 + }, + { + "epoch": 0.72, + "learning_rate": 7.504347926148086e-06, + "loss": 1.2407, + "step": 3439 + }, + { + "epoch": 0.72, + "learning_rate": 7.4937112625597375e-06, + "loss": 1.2644, + "step": 3440 + }, + { + "epoch": 0.72, + "learning_rate": 7.483080404448744e-06, + "loss": 1.2944, + "step": 3441 + }, + { + "epoch": 0.72, + "learning_rate": 7.472455356749992e-06, + "loss": 1.2336, + "step": 3442 + }, + { + "epoch": 0.72, + "learning_rate": 7.461836124395692e-06, + "loss": 1.2524, + "step": 3443 + }, + { + "epoch": 0.72, + "learning_rate": 7.451222712315325e-06, + "loss": 1.2425, + "step": 3444 + }, + { + "epoch": 0.72, + "learning_rate": 7.440615125435702e-06, + "loss": 1.2094, + "step": 3445 + }, + { + "epoch": 0.72, + "learning_rate": 7.430013368680908e-06, + "loss": 1.2546, + "step": 3446 + }, + { + "epoch": 0.73, + "learning_rate": 7.419417446972319e-06, + "loss": 1.255, + "step": 3447 + }, + { + "epoch": 0.73, + "learning_rate": 7.408827365228625e-06, + "loss": 1.1557, + "step": 3448 + }, + { + "epoch": 0.73, + "learning_rate": 7.3982431283657805e-06, + "loss": 1.2077, + "step": 3449 + }, + { + "epoch": 0.73, + "learning_rate": 7.38766474129704e-06, + "loss": 1.2892, + "step": 3450 + }, + { + "epoch": 0.73, + "learning_rate": 7.37709220893295e-06, + "loss": 1.2841, + "step": 3451 + }, + { + "epoch": 0.73, + "learning_rate": 7.3665255361813125e-06, + "loss": 1.2524, + "step": 3452 + }, + { + "epoch": 0.73, + "learning_rate": 7.355964727947242e-06, + "loss": 1.2555, + "step": 3453 + }, + { + "epoch": 0.73, + "learning_rate": 7.3454097891331085e-06, + "loss": 1.2726, + "step": 3454 + }, + { + "epoch": 0.73, + "learning_rate": 7.334860724638555e-06, + "loss": 1.244, + "step": 3455 + }, + { + "epoch": 0.73, + "learning_rate": 7.3243175393605215e-06, + "loss": 1.2741, + "step": 3456 + }, + { + "epoch": 0.73, + "learning_rate": 7.313780238193195e-06, + "loss": 1.2557, + "step": 3457 + }, + { + "epoch": 0.73, + "learning_rate": 7.303248826028036e-06, + "loss": 1.1983, + "step": 3458 + }, + { + "epoch": 0.73, + "learning_rate": 7.292723307753784e-06, + "loss": 1.2621, + "step": 3459 + }, + { + "epoch": 0.73, + "learning_rate": 7.282203688256422e-06, + "loss": 1.2212, + "step": 3460 + }, + { + "epoch": 0.73, + "learning_rate": 7.27168997241922e-06, + "loss": 1.2561, + "step": 3461 + }, + { + "epoch": 0.73, + "learning_rate": 7.261182165122689e-06, + "loss": 1.27, + "step": 3462 + }, + { + "epoch": 0.73, + "learning_rate": 7.250680271244593e-06, + "loss": 1.2527, + "step": 3463 + }, + { + "epoch": 0.73, + "learning_rate": 7.240184295659971e-06, + "loss": 1.2466, + "step": 3464 + }, + { + "epoch": 0.73, + "learning_rate": 7.229694243241097e-06, + "loss": 1.2089, + "step": 3465 + }, + { + "epoch": 0.73, + "learning_rate": 7.219210118857509e-06, + "loss": 1.2399, + "step": 3466 + }, + { + "epoch": 0.73, + "learning_rate": 7.208731927375982e-06, + "loss": 1.2491, + "step": 3467 + }, + { + "epoch": 0.73, + "learning_rate": 7.198259673660535e-06, + "loss": 1.2486, + "step": 3468 + }, + { + "epoch": 0.73, + "learning_rate": 7.187793362572451e-06, + "loss": 1.2738, + "step": 3469 + }, + { + "epoch": 0.73, + "learning_rate": 7.17733299897023e-06, + "loss": 1.2042, + "step": 3470 + }, + { + "epoch": 0.73, + "learning_rate": 7.166878587709618e-06, + "loss": 1.2367, + "step": 3471 + }, + { + "epoch": 0.73, + "learning_rate": 7.156430133643613e-06, + "loss": 1.257, + "step": 3472 + }, + { + "epoch": 0.73, + "learning_rate": 7.145987641622423e-06, + "loss": 1.2147, + "step": 3473 + }, + { + "epoch": 0.73, + "learning_rate": 7.1355511164935085e-06, + "loss": 1.202, + "step": 3474 + }, + { + "epoch": 0.73, + "learning_rate": 7.125120563101562e-06, + "loss": 1.2162, + "step": 3475 + }, + { + "epoch": 0.73, + "learning_rate": 7.114695986288476e-06, + "loss": 1.2245, + "step": 3476 + }, + { + "epoch": 0.73, + "learning_rate": 7.104277390893404e-06, + "loss": 1.2367, + "step": 3477 + }, + { + "epoch": 0.73, + "learning_rate": 7.0938647817527014e-06, + "loss": 1.235, + "step": 3478 + }, + { + "epoch": 0.73, + "learning_rate": 7.083458163699939e-06, + "loss": 1.2113, + "step": 3479 + }, + { + "epoch": 0.73, + "learning_rate": 7.073057541565933e-06, + "loss": 1.2623, + "step": 3480 + }, + { + "epoch": 0.73, + "learning_rate": 7.062662920178689e-06, + "loss": 1.2144, + "step": 3481 + }, + { + "epoch": 0.73, + "learning_rate": 7.052274304363449e-06, + "loss": 1.2208, + "step": 3482 + }, + { + "epoch": 0.73, + "learning_rate": 7.041891698942649e-06, + "loss": 1.2248, + "step": 3483 + }, + { + "epoch": 0.73, + "learning_rate": 7.03151510873594e-06, + "loss": 1.2647, + "step": 3484 + }, + { + "epoch": 0.73, + "learning_rate": 7.021144538560194e-06, + "loss": 1.2, + "step": 3485 + }, + { + "epoch": 0.73, + "learning_rate": 7.010779993229471e-06, + "loss": 1.2185, + "step": 3486 + }, + { + "epoch": 0.73, + "learning_rate": 7.000421477555038e-06, + "loss": 1.2572, + "step": 3487 + }, + { + "epoch": 0.73, + "learning_rate": 6.9900689963453734e-06, + "loss": 1.2395, + "step": 3488 + }, + { + "epoch": 0.73, + "learning_rate": 6.9797225544061385e-06, + "loss": 1.2401, + "step": 3489 + }, + { + "epoch": 0.73, + "learning_rate": 6.969382156540212e-06, + "loss": 1.2825, + "step": 3490 + }, + { + "epoch": 0.73, + "learning_rate": 6.9590478075476475e-06, + "loss": 1.2534, + "step": 3491 + }, + { + "epoch": 0.73, + "learning_rate": 6.9487195122256925e-06, + "loss": 1.2292, + "step": 3492 + }, + { + "epoch": 0.73, + "learning_rate": 6.9383972753688e-06, + "loss": 1.2419, + "step": 3493 + }, + { + "epoch": 0.73, + "learning_rate": 6.928081101768589e-06, + "loss": 1.2715, + "step": 3494 + }, + { + "epoch": 0.74, + "learning_rate": 6.9177709962138905e-06, + "loss": 1.2377, + "step": 3495 + }, + { + "epoch": 0.74, + "learning_rate": 6.907466963490692e-06, + "loss": 1.2476, + "step": 3496 + }, + { + "epoch": 0.74, + "learning_rate": 6.897169008382172e-06, + "loss": 1.2539, + "step": 3497 + }, + { + "epoch": 0.74, + "learning_rate": 6.88687713566869e-06, + "loss": 1.2817, + "step": 3498 + }, + { + "epoch": 0.74, + "learning_rate": 6.876591350127795e-06, + "loss": 1.2159, + "step": 3499 + }, + { + "epoch": 0.74, + "learning_rate": 6.866311656534177e-06, + "loss": 1.2417, + "step": 3500 + }, + { + "epoch": 0.74, + "learning_rate": 6.856038059659731e-06, + "loss": 1.2626, + "step": 3501 + }, + { + "epoch": 0.74, + "learning_rate": 6.8457705642734994e-06, + "loss": 1.2404, + "step": 3502 + }, + { + "epoch": 0.74, + "learning_rate": 6.835509175141713e-06, + "loss": 1.2234, + "step": 3503 + }, + { + "epoch": 0.74, + "learning_rate": 6.825253897027746e-06, + "loss": 1.2569, + "step": 3504 + }, + { + "epoch": 0.74, + "learning_rate": 6.815004734692146e-06, + "loss": 1.2727, + "step": 3505 + }, + { + "epoch": 0.74, + "learning_rate": 6.804761692892627e-06, + "loss": 1.2193, + "step": 3506 + }, + { + "epoch": 0.74, + "learning_rate": 6.794524776384059e-06, + "loss": 1.2128, + "step": 3507 + }, + { + "epoch": 0.74, + "learning_rate": 6.784293989918454e-06, + "loss": 1.2532, + "step": 3508 + }, + { + "epoch": 0.74, + "learning_rate": 6.774069338245002e-06, + "loss": 1.2278, + "step": 3509 + }, + { + "epoch": 0.74, + "learning_rate": 6.763850826110025e-06, + "loss": 1.2912, + "step": 3510 + }, + { + "epoch": 0.74, + "learning_rate": 6.753638458257017e-06, + "loss": 1.2289, + "step": 3511 + }, + { + "epoch": 0.74, + "learning_rate": 6.743432239426599e-06, + "loss": 1.2061, + "step": 3512 + }, + { + "epoch": 0.74, + "learning_rate": 6.733232174356537e-06, + "loss": 1.182, + "step": 3513 + }, + { + "epoch": 0.74, + "learning_rate": 6.723038267781763e-06, + "loss": 1.2255, + "step": 3514 + }, + { + "epoch": 0.74, + "learning_rate": 6.712850524434329e-06, + "loss": 1.2829, + "step": 3515 + }, + { + "epoch": 0.74, + "learning_rate": 6.7026689490434275e-06, + "loss": 1.2193, + "step": 3516 + }, + { + "epoch": 0.74, + "learning_rate": 6.692493546335404e-06, + "loss": 1.2078, + "step": 3517 + }, + { + "epoch": 0.74, + "learning_rate": 6.682324321033715e-06, + "loss": 1.215, + "step": 3518 + }, + { + "epoch": 0.74, + "learning_rate": 6.672161277858977e-06, + "loss": 1.2736, + "step": 3519 + }, + { + "epoch": 0.74, + "learning_rate": 6.662004421528909e-06, + "loss": 1.1534, + "step": 3520 + }, + { + "epoch": 0.74, + "learning_rate": 6.651853756758382e-06, + "loss": 1.2636, + "step": 3521 + }, + { + "epoch": 0.74, + "learning_rate": 6.641709288259368e-06, + "loss": 1.2854, + "step": 3522 + }, + { + "epoch": 0.74, + "learning_rate": 6.6315710207409925e-06, + "loss": 1.2688, + "step": 3523 + }, + { + "epoch": 0.74, + "learning_rate": 6.621438958909472e-06, + "loss": 1.2235, + "step": 3524 + }, + { + "epoch": 0.74, + "learning_rate": 6.6113131074681694e-06, + "loss": 1.2121, + "step": 3525 + }, + { + "epoch": 0.74, + "learning_rate": 6.6011934711175395e-06, + "loss": 1.2806, + "step": 3526 + }, + { + "epoch": 0.74, + "learning_rate": 6.591080054555177e-06, + "loss": 1.2419, + "step": 3527 + }, + { + "epoch": 0.74, + "learning_rate": 6.580972862475769e-06, + "loss": 1.2342, + "step": 3528 + }, + { + "epoch": 0.74, + "learning_rate": 6.570871899571119e-06, + "loss": 1.2547, + "step": 3529 + }, + { + "epoch": 0.74, + "learning_rate": 6.56077717053015e-06, + "loss": 1.214, + "step": 3530 + }, + { + "epoch": 0.74, + "learning_rate": 6.550688680038871e-06, + "loss": 1.2362, + "step": 3531 + }, + { + "epoch": 0.74, + "learning_rate": 6.5406064327804165e-06, + "loss": 1.2625, + "step": 3532 + }, + { + "epoch": 0.74, + "learning_rate": 6.5305304334350075e-06, + "loss": 1.2325, + "step": 3533 + }, + { + "epoch": 0.74, + "learning_rate": 6.520460686679964e-06, + "loss": 1.2307, + "step": 3534 + }, + { + "epoch": 0.74, + "learning_rate": 6.510397197189724e-06, + "loss": 1.2151, + "step": 3535 + }, + { + "epoch": 0.74, + "learning_rate": 6.500339969635794e-06, + "loss": 1.1559, + "step": 3536 + }, + { + "epoch": 0.74, + "learning_rate": 6.490289008686786e-06, + "loss": 1.2368, + "step": 3537 + }, + { + "epoch": 0.74, + "learning_rate": 6.480244319008411e-06, + "loss": 1.2878, + "step": 3538 + }, + { + "epoch": 0.74, + "learning_rate": 6.470205905263449e-06, + "loss": 1.2264, + "step": 3539 + }, + { + "epoch": 0.74, + "learning_rate": 6.460173772111791e-06, + "loss": 1.2734, + "step": 3540 + }, + { + "epoch": 0.74, + "learning_rate": 6.450147924210395e-06, + "loss": 1.1985, + "step": 3541 + }, + { + "epoch": 0.75, + "learning_rate": 6.440128366213297e-06, + "loss": 1.227, + "step": 3542 + }, + { + "epoch": 0.75, + "learning_rate": 6.430115102771637e-06, + "loss": 1.2165, + "step": 3543 + }, + { + "epoch": 0.75, + "learning_rate": 6.420108138533607e-06, + "loss": 1.2112, + "step": 3544 + }, + { + "epoch": 0.75, + "learning_rate": 6.410107478144496e-06, + "loss": 1.2751, + "step": 3545 + }, + { + "epoch": 0.75, + "learning_rate": 6.400113126246645e-06, + "loss": 1.2595, + "step": 3546 + }, + { + "epoch": 0.75, + "learning_rate": 6.390125087479493e-06, + "loss": 1.2717, + "step": 3547 + }, + { + "epoch": 0.75, + "learning_rate": 6.380143366479521e-06, + "loss": 1.249, + "step": 3548 + }, + { + "epoch": 0.75, + "learning_rate": 6.370167967880303e-06, + "loss": 1.1961, + "step": 3549 + }, + { + "epoch": 0.75, + "learning_rate": 6.360198896312451e-06, + "loss": 1.2486, + "step": 3550 + }, + { + "epoch": 0.75, + "learning_rate": 6.350236156403666e-06, + "loss": 1.2977, + "step": 3551 + }, + { + "epoch": 0.75, + "learning_rate": 6.3402797527786904e-06, + "loss": 1.1871, + "step": 3552 + }, + { + "epoch": 0.75, + "learning_rate": 6.330329690059342e-06, + "loss": 1.3087, + "step": 3553 + }, + { + "epoch": 0.75, + "learning_rate": 6.32038597286448e-06, + "loss": 1.201, + "step": 3554 + }, + { + "epoch": 0.75, + "learning_rate": 6.31044860581002e-06, + "loss": 1.248, + "step": 3555 + }, + { + "epoch": 0.75, + "learning_rate": 6.300517593508944e-06, + "loss": 1.1977, + "step": 3556 + }, + { + "epoch": 0.75, + "learning_rate": 6.290592940571269e-06, + "loss": 1.2783, + "step": 3557 + }, + { + "epoch": 0.75, + "learning_rate": 6.280674651604059e-06, + "loss": 1.2428, + "step": 3558 + }, + { + "epoch": 0.75, + "learning_rate": 6.270762731211442e-06, + "loss": 1.2188, + "step": 3559 + }, + { + "epoch": 0.75, + "learning_rate": 6.260857183994564e-06, + "loss": 1.2497, + "step": 3560 + }, + { + "epoch": 0.75, + "learning_rate": 6.25095801455164e-06, + "loss": 1.2437, + "step": 3561 + }, + { + "epoch": 0.75, + "learning_rate": 6.241065227477905e-06, + "loss": 1.2495, + "step": 3562 + }, + { + "epoch": 0.75, + "learning_rate": 6.231178827365627e-06, + "loss": 1.2344, + "step": 3563 + }, + { + "epoch": 0.75, + "learning_rate": 6.221298818804136e-06, + "loss": 1.2486, + "step": 3564 + }, + { + "epoch": 0.75, + "learning_rate": 6.211425206379769e-06, + "loss": 1.225, + "step": 3565 + }, + { + "epoch": 0.75, + "learning_rate": 6.201557994675895e-06, + "loss": 1.2596, + "step": 3566 + }, + { + "epoch": 0.75, + "learning_rate": 6.191697188272933e-06, + "loss": 1.2276, + "step": 3567 + }, + { + "epoch": 0.75, + "learning_rate": 6.181842791748307e-06, + "loss": 1.2601, + "step": 3568 + }, + { + "epoch": 0.75, + "learning_rate": 6.17199480967648e-06, + "loss": 1.2197, + "step": 3569 + }, + { + "epoch": 0.75, + "learning_rate": 6.162153246628921e-06, + "loss": 1.2276, + "step": 3570 + }, + { + "epoch": 0.75, + "learning_rate": 6.152318107174144e-06, + "loss": 1.2539, + "step": 3571 + }, + { + "epoch": 0.75, + "learning_rate": 6.142489395877651e-06, + "loss": 1.2383, + "step": 3572 + }, + { + "epoch": 0.75, + "learning_rate": 6.132667117301989e-06, + "loss": 1.2276, + "step": 3573 + }, + { + "epoch": 0.75, + "learning_rate": 6.122851276006692e-06, + "loss": 1.2338, + "step": 3574 + }, + { + "epoch": 0.75, + "learning_rate": 6.113041876548333e-06, + "loss": 1.2444, + "step": 3575 + }, + { + "epoch": 0.75, + "learning_rate": 6.103238923480468e-06, + "loss": 1.2727, + "step": 3576 + }, + { + "epoch": 0.75, + "learning_rate": 6.093442421353683e-06, + "loss": 1.2161, + "step": 3577 + }, + { + "epoch": 0.75, + "learning_rate": 6.083652374715561e-06, + "loss": 1.2587, + "step": 3578 + }, + { + "epoch": 0.75, + "learning_rate": 6.073868788110673e-06, + "loss": 1.2926, + "step": 3579 + }, + { + "epoch": 0.75, + "learning_rate": 6.064091666080621e-06, + "loss": 1.2441, + "step": 3580 + }, + { + "epoch": 0.75, + "learning_rate": 6.054321013163978e-06, + "loss": 1.2681, + "step": 3581 + }, + { + "epoch": 0.75, + "learning_rate": 6.044556833896338e-06, + "loss": 1.2571, + "step": 3582 + }, + { + "epoch": 0.75, + "learning_rate": 6.034799132810274e-06, + "loss": 1.2116, + "step": 3583 + }, + { + "epoch": 0.75, + "learning_rate": 6.025047914435349e-06, + "loss": 1.2729, + "step": 3584 + }, + { + "epoch": 0.75, + "learning_rate": 6.015303183298135e-06, + "loss": 1.2212, + "step": 3585 + }, + { + "epoch": 0.75, + "learning_rate": 6.005564943922179e-06, + "loss": 1.2731, + "step": 3586 + }, + { + "epoch": 0.75, + "learning_rate": 5.995833200828007e-06, + "loss": 1.2619, + "step": 3587 + }, + { + "epoch": 0.75, + "learning_rate": 5.9861079585331535e-06, + "loss": 1.2284, + "step": 3588 + }, + { + "epoch": 0.75, + "learning_rate": 5.97638922155211e-06, + "loss": 1.2559, + "step": 3589 + }, + { + "epoch": 0.76, + "learning_rate": 5.9666769943963675e-06, + "loss": 1.3126, + "step": 3590 + }, + { + "epoch": 0.76, + "learning_rate": 5.956971281574386e-06, + "loss": 1.2676, + "step": 3591 + }, + { + "epoch": 0.76, + "learning_rate": 5.947272087591591e-06, + "loss": 1.2489, + "step": 3592 + }, + { + "epoch": 0.76, + "learning_rate": 5.9375794169504095e-06, + "loss": 1.2458, + "step": 3593 + }, + { + "epoch": 0.76, + "learning_rate": 5.927893274150214e-06, + "loss": 1.192, + "step": 3594 + }, + { + "epoch": 0.76, + "learning_rate": 5.918213663687362e-06, + "loss": 1.2844, + "step": 3595 + }, + { + "epoch": 0.76, + "learning_rate": 5.908540590055168e-06, + "loss": 1.2122, + "step": 3596 + }, + { + "epoch": 0.76, + "learning_rate": 5.898874057743926e-06, + "loss": 1.2162, + "step": 3597 + }, + { + "epoch": 0.76, + "learning_rate": 5.889214071240876e-06, + "loss": 1.2739, + "step": 3598 + }, + { + "epoch": 0.76, + "learning_rate": 5.879560635030242e-06, + "loss": 1.3043, + "step": 3599 + }, + { + "epoch": 0.76, + "learning_rate": 5.869913753593175e-06, + "loss": 1.2569, + "step": 3600 + }, + { + "epoch": 0.76, + "learning_rate": 5.860273431407821e-06, + "loss": 1.1974, + "step": 3601 + }, + { + "epoch": 0.76, + "learning_rate": 5.8506396729492455e-06, + "loss": 1.236, + "step": 3602 + }, + { + "epoch": 0.76, + "learning_rate": 5.841012482689501e-06, + "loss": 1.2349, + "step": 3603 + }, + { + "epoch": 0.76, + "learning_rate": 5.831391865097564e-06, + "loss": 1.191, + "step": 3604 + }, + { + "epoch": 0.76, + "learning_rate": 5.821777824639365e-06, + "loss": 1.2687, + "step": 3605 + }, + { + "epoch": 0.76, + "learning_rate": 5.812170365777801e-06, + "loss": 1.2637, + "step": 3606 + }, + { + "epoch": 0.76, + "learning_rate": 5.802569492972687e-06, + "loss": 1.2613, + "step": 3607 + }, + { + "epoch": 0.76, + "learning_rate": 5.792975210680793e-06, + "loss": 1.2483, + "step": 3608 + }, + { + "epoch": 0.76, + "learning_rate": 5.783387523355839e-06, + "loss": 1.2237, + "step": 3609 + }, + { + "epoch": 0.76, + "learning_rate": 5.773806435448459e-06, + "loss": 1.2205, + "step": 3610 + }, + { + "epoch": 0.76, + "learning_rate": 5.764231951406256e-06, + "loss": 1.2307, + "step": 3611 + }, + { + "epoch": 0.76, + "learning_rate": 5.75466407567374e-06, + "loss": 1.2022, + "step": 3612 + }, + { + "epoch": 0.76, + "learning_rate": 5.745102812692358e-06, + "loss": 1.194, + "step": 3613 + }, + { + "epoch": 0.76, + "learning_rate": 5.735548166900506e-06, + "loss": 1.232, + "step": 3614 + }, + { + "epoch": 0.76, + "learning_rate": 5.72600014273349e-06, + "loss": 1.2266, + "step": 3615 + }, + { + "epoch": 0.76, + "learning_rate": 5.716458744623536e-06, + "loss": 1.223, + "step": 3616 + }, + { + "epoch": 0.76, + "learning_rate": 5.706923976999825e-06, + "loss": 1.2352, + "step": 3617 + }, + { + "epoch": 0.76, + "learning_rate": 5.697395844288423e-06, + "loss": 1.2387, + "step": 3618 + }, + { + "epoch": 0.76, + "learning_rate": 5.687874350912346e-06, + "loss": 1.2801, + "step": 3619 + }, + { + "epoch": 0.76, + "learning_rate": 5.678359501291504e-06, + "loss": 1.2382, + "step": 3620 + }, + { + "epoch": 0.76, + "learning_rate": 5.668851299842739e-06, + "loss": 1.2551, + "step": 3621 + }, + { + "epoch": 0.76, + "learning_rate": 5.659349750979814e-06, + "loss": 1.2399, + "step": 3622 + }, + { + "epoch": 0.76, + "learning_rate": 5.6498548591133725e-06, + "loss": 1.2322, + "step": 3623 + }, + { + "epoch": 0.76, + "learning_rate": 5.6403666286510065e-06, + "loss": 1.2039, + "step": 3624 + }, + { + "epoch": 0.76, + "learning_rate": 5.630885063997187e-06, + "loss": 1.2529, + "step": 3625 + }, + { + "epoch": 0.76, + "learning_rate": 5.621410169553292e-06, + "loss": 1.2361, + "step": 3626 + }, + { + "epoch": 0.76, + "learning_rate": 5.6119419497176275e-06, + "loss": 1.192, + "step": 3627 + }, + { + "epoch": 0.76, + "learning_rate": 5.6024804088853775e-06, + "loss": 1.2415, + "step": 3628 + }, + { + "epoch": 0.76, + "learning_rate": 5.59302555144863e-06, + "loss": 1.2426, + "step": 3629 + }, + { + "epoch": 0.76, + "learning_rate": 5.5835773817963814e-06, + "loss": 1.2766, + "step": 3630 + }, + { + "epoch": 0.76, + "learning_rate": 5.574135904314504e-06, + "loss": 1.253, + "step": 3631 + }, + { + "epoch": 0.76, + "learning_rate": 5.5647011233857915e-06, + "loss": 1.205, + "step": 3632 + }, + { + "epoch": 0.76, + "learning_rate": 5.555273043389906e-06, + "loss": 1.2142, + "step": 3633 + }, + { + "epoch": 0.76, + "learning_rate": 5.545851668703397e-06, + "loss": 1.2438, + "step": 3634 + }, + { + "epoch": 0.76, + "learning_rate": 5.536437003699724e-06, + "loss": 1.2575, + "step": 3635 + }, + { + "epoch": 0.76, + "learning_rate": 5.527029052749216e-06, + "loss": 1.1944, + "step": 3636 + }, + { + "epoch": 0.76, + "learning_rate": 5.517627820219076e-06, + "loss": 1.2532, + "step": 3637 + }, + { + "epoch": 0.77, + "learning_rate": 5.508233310473412e-06, + "loss": 1.2462, + "step": 3638 + }, + { + "epoch": 0.77, + "learning_rate": 5.498845527873193e-06, + "loss": 1.2677, + "step": 3639 + }, + { + "epoch": 0.77, + "learning_rate": 5.489464476776276e-06, + "loss": 1.2286, + "step": 3640 + }, + { + "epoch": 0.77, + "learning_rate": 5.480090161537388e-06, + "loss": 1.196, + "step": 3641 + }, + { + "epoch": 0.77, + "learning_rate": 5.470722586508122e-06, + "loss": 1.2695, + "step": 3642 + }, + { + "epoch": 0.77, + "learning_rate": 5.4613617560369625e-06, + "loss": 1.2407, + "step": 3643 + }, + { + "epoch": 0.77, + "learning_rate": 5.452007674469235e-06, + "loss": 1.2691, + "step": 3644 + }, + { + "epoch": 0.77, + "learning_rate": 5.442660346147157e-06, + "loss": 1.2453, + "step": 3645 + }, + { + "epoch": 0.77, + "learning_rate": 5.433319775409807e-06, + "loss": 1.2642, + "step": 3646 + }, + { + "epoch": 0.77, + "learning_rate": 5.4239859665931105e-06, + "loss": 1.254, + "step": 3647 + }, + { + "epoch": 0.77, + "learning_rate": 5.4146589240298745e-06, + "loss": 1.2766, + "step": 3648 + }, + { + "epoch": 0.77, + "learning_rate": 5.405338652049749e-06, + "loss": 1.2345, + "step": 3649 + }, + { + "epoch": 0.77, + "learning_rate": 5.396025154979247e-06, + "loss": 1.179, + "step": 3650 + }, + { + "epoch": 0.77, + "learning_rate": 5.386718437141743e-06, + "loss": 1.1928, + "step": 3651 + }, + { + "epoch": 0.77, + "learning_rate": 5.37741850285745e-06, + "loss": 1.2355, + "step": 3652 + }, + { + "epoch": 0.77, + "learning_rate": 5.368125356443452e-06, + "loss": 1.2487, + "step": 3653 + }, + { + "epoch": 0.77, + "learning_rate": 5.358839002213665e-06, + "loss": 1.2252, + "step": 3654 + }, + { + "epoch": 0.77, + "learning_rate": 5.349559444478849e-06, + "loss": 1.2632, + "step": 3655 + }, + { + "epoch": 0.77, + "learning_rate": 5.3402866875466344e-06, + "loss": 1.2128, + "step": 3656 + }, + { + "epoch": 0.77, + "learning_rate": 5.331020735721469e-06, + "loss": 1.2661, + "step": 3657 + }, + { + "epoch": 0.77, + "learning_rate": 5.321761593304646e-06, + "loss": 1.2443, + "step": 3658 + }, + { + "epoch": 0.77, + "learning_rate": 5.312509264594312e-06, + "loss": 1.1673, + "step": 3659 + }, + { + "epoch": 0.77, + "learning_rate": 5.303263753885433e-06, + "loss": 1.2504, + "step": 3660 + }, + { + "epoch": 0.77, + "learning_rate": 5.294025065469827e-06, + "loss": 1.2509, + "step": 3661 + }, + { + "epoch": 0.77, + "learning_rate": 5.284793203636132e-06, + "loss": 1.3018, + "step": 3662 + }, + { + "epoch": 0.77, + "learning_rate": 5.2755681726698134e-06, + "loss": 1.2068, + "step": 3663 + }, + { + "epoch": 0.77, + "learning_rate": 5.2663499768531865e-06, + "loss": 1.2447, + "step": 3664 + }, + { + "epoch": 0.77, + "learning_rate": 5.257138620465374e-06, + "loss": 1.1832, + "step": 3665 + }, + { + "epoch": 0.77, + "learning_rate": 5.247934107782324e-06, + "loss": 1.2177, + "step": 3666 + }, + { + "epoch": 0.77, + "learning_rate": 5.238736443076828e-06, + "loss": 1.2891, + "step": 3667 + }, + { + "epoch": 0.77, + "learning_rate": 5.2295456306184715e-06, + "loss": 1.2142, + "step": 3668 + }, + { + "epoch": 0.77, + "learning_rate": 5.220361674673677e-06, + "loss": 1.2576, + "step": 3669 + }, + { + "epoch": 0.77, + "learning_rate": 5.211184579505688e-06, + "loss": 1.2489, + "step": 3670 + }, + { + "epoch": 0.77, + "learning_rate": 5.2020143493745425e-06, + "loss": 1.2028, + "step": 3671 + }, + { + "epoch": 0.77, + "learning_rate": 5.192850988537115e-06, + "loss": 1.2788, + "step": 3672 + }, + { + "epoch": 0.77, + "learning_rate": 5.183694501247072e-06, + "loss": 1.2287, + "step": 3673 + }, + { + "epoch": 0.77, + "learning_rate": 5.174544891754896e-06, + "loss": 1.2406, + "step": 3674 + }, + { + "epoch": 0.77, + "learning_rate": 5.165402164307884e-06, + "loss": 1.2041, + "step": 3675 + }, + { + "epoch": 0.77, + "learning_rate": 5.15626632315013e-06, + "loss": 1.257, + "step": 3676 + }, + { + "epoch": 0.77, + "learning_rate": 5.147137372522537e-06, + "loss": 1.2025, + "step": 3677 + }, + { + "epoch": 0.77, + "learning_rate": 5.138015316662803e-06, + "loss": 1.2163, + "step": 3678 + }, + { + "epoch": 0.77, + "learning_rate": 5.128900159805425e-06, + "loss": 1.2846, + "step": 3679 + }, + { + "epoch": 0.77, + "learning_rate": 5.119791906181713e-06, + "loss": 1.2558, + "step": 3680 + }, + { + "epoch": 0.77, + "learning_rate": 5.110690560019744e-06, + "loss": 1.2866, + "step": 3681 + }, + { + "epoch": 0.77, + "learning_rate": 5.1015961255444235e-06, + "loss": 1.2177, + "step": 3682 + }, + { + "epoch": 0.77, + "learning_rate": 5.09250860697742e-06, + "loss": 1.2401, + "step": 3683 + }, + { + "epoch": 0.77, + "learning_rate": 5.083428008537197e-06, + "loss": 1.259, + "step": 3684 + }, + { + "epoch": 0.78, + "learning_rate": 5.074354334439022e-06, + "loss": 1.2806, + "step": 3685 + }, + { + "epoch": 0.78, + "learning_rate": 5.065287588894933e-06, + "loss": 1.1889, + "step": 3686 + }, + { + "epoch": 0.78, + "learning_rate": 5.056227776113747e-06, + "loss": 1.2237, + "step": 3687 + }, + { + "epoch": 0.78, + "learning_rate": 5.0471749003010835e-06, + "loss": 1.2335, + "step": 3688 + }, + { + "epoch": 0.78, + "learning_rate": 5.038128965659317e-06, + "loss": 1.2801, + "step": 3689 + }, + { + "epoch": 0.78, + "learning_rate": 5.029089976387627e-06, + "loss": 1.2576, + "step": 3690 + }, + { + "epoch": 0.78, + "learning_rate": 5.020057936681939e-06, + "loss": 1.2338, + "step": 3691 + }, + { + "epoch": 0.78, + "learning_rate": 5.011032850734983e-06, + "loss": 1.2521, + "step": 3692 + }, + { + "epoch": 0.78, + "learning_rate": 5.00201472273623e-06, + "loss": 1.2056, + "step": 3693 + }, + { + "epoch": 0.78, + "learning_rate": 4.993003556871954e-06, + "loss": 1.2287, + "step": 3694 + }, + { + "epoch": 0.78, + "learning_rate": 4.983999357325164e-06, + "loss": 1.1885, + "step": 3695 + }, + { + "epoch": 0.78, + "learning_rate": 4.975002128275666e-06, + "loss": 1.2171, + "step": 3696 + }, + { + "epoch": 0.78, + "learning_rate": 4.966011873900001e-06, + "loss": 1.2356, + "step": 3697 + }, + { + "epoch": 0.78, + "learning_rate": 4.957028598371498e-06, + "loss": 1.2419, + "step": 3698 + }, + { + "epoch": 0.78, + "learning_rate": 4.948052305860233e-06, + "loss": 1.2049, + "step": 3699 + }, + { + "epoch": 0.78, + "learning_rate": 4.939083000533036e-06, + "loss": 1.2135, + "step": 3700 + }, + { + "epoch": 0.78, + "learning_rate": 4.93012068655351e-06, + "loss": 1.1876, + "step": 3701 + }, + { + "epoch": 0.78, + "learning_rate": 4.92116536808199e-06, + "loss": 1.2686, + "step": 3702 + }, + { + "epoch": 0.78, + "learning_rate": 4.912217049275594e-06, + "loss": 1.2107, + "step": 3703 + }, + { + "epoch": 0.78, + "learning_rate": 4.903275734288162e-06, + "loss": 1.2027, + "step": 3704 + }, + { + "epoch": 0.78, + "learning_rate": 4.8943414272702886e-06, + "loss": 1.2154, + "step": 3705 + }, + { + "epoch": 0.78, + "learning_rate": 4.885414132369335e-06, + "loss": 1.2347, + "step": 3706 + }, + { + "epoch": 0.78, + "learning_rate": 4.876493853729385e-06, + "loss": 1.218, + "step": 3707 + }, + { + "epoch": 0.78, + "learning_rate": 4.867580595491268e-06, + "loss": 1.2118, + "step": 3708 + }, + { + "epoch": 0.78, + "learning_rate": 4.858674361792571e-06, + "loss": 1.2323, + "step": 3709 + }, + { + "epoch": 0.78, + "learning_rate": 4.849775156767598e-06, + "loss": 1.2283, + "step": 3710 + }, + { + "epoch": 0.78, + "learning_rate": 4.840882984547415e-06, + "loss": 1.2607, + "step": 3711 + }, + { + "epoch": 0.78, + "learning_rate": 4.8319978492598e-06, + "loss": 1.2752, + "step": 3712 + }, + { + "epoch": 0.78, + "learning_rate": 4.823119755029271e-06, + "loss": 1.2268, + "step": 3713 + }, + { + "epoch": 0.78, + "learning_rate": 4.814248705977092e-06, + "loss": 1.2078, + "step": 3714 + }, + { + "epoch": 0.78, + "learning_rate": 4.805384706221232e-06, + "loss": 1.2239, + "step": 3715 + }, + { + "epoch": 0.78, + "learning_rate": 4.796527759876415e-06, + "loss": 1.199, + "step": 3716 + }, + { + "epoch": 0.78, + "learning_rate": 4.787677871054062e-06, + "loss": 1.2697, + "step": 3717 + }, + { + "epoch": 0.78, + "learning_rate": 4.7788350438623465e-06, + "loss": 1.2864, + "step": 3718 + }, + { + "epoch": 0.78, + "learning_rate": 4.769999282406137e-06, + "loss": 1.2448, + "step": 3719 + }, + { + "epoch": 0.78, + "learning_rate": 4.7611705907870474e-06, + "loss": 1.2648, + "step": 3720 + }, + { + "epoch": 0.78, + "learning_rate": 4.7523489731033845e-06, + "loss": 1.2404, + "step": 3721 + }, + { + "epoch": 0.78, + "learning_rate": 4.743534433450199e-06, + "loss": 1.2325, + "step": 3722 + }, + { + "epoch": 0.78, + "learning_rate": 4.734726975919233e-06, + "loss": 1.2223, + "step": 3723 + }, + { + "epoch": 0.78, + "learning_rate": 4.725926604598942e-06, + "loss": 1.2159, + "step": 3724 + }, + { + "epoch": 0.78, + "learning_rate": 4.7171333235745145e-06, + "loss": 1.24, + "step": 3725 + }, + { + "epoch": 0.78, + "learning_rate": 4.708347136927818e-06, + "loss": 1.2243, + "step": 3726 + }, + { + "epoch": 0.78, + "learning_rate": 4.699568048737453e-06, + "loss": 1.2913, + "step": 3727 + }, + { + "epoch": 0.78, + "learning_rate": 4.690796063078709e-06, + "loss": 1.1972, + "step": 3728 + }, + { + "epoch": 0.78, + "learning_rate": 4.6820311840235745e-06, + "loss": 1.2271, + "step": 3729 + }, + { + "epoch": 0.78, + "learning_rate": 4.67327341564076e-06, + "loss": 1.2028, + "step": 3730 + }, + { + "epoch": 0.78, + "learning_rate": 4.6645227619956515e-06, + "loss": 1.2191, + "step": 3731 + }, + { + "epoch": 0.78, + "learning_rate": 4.655779227150352e-06, + "loss": 1.2429, + "step": 3732 + }, + { + "epoch": 0.79, + "learning_rate": 4.647042815163649e-06, + "loss": 1.1935, + "step": 3733 + }, + { + "epoch": 0.79, + "learning_rate": 4.638313530091016e-06, + "loss": 1.2538, + "step": 3734 + }, + { + "epoch": 0.79, + "learning_rate": 4.629591375984641e-06, + "loss": 1.2513, + "step": 3735 + }, + { + "epoch": 0.79, + "learning_rate": 4.620876356893385e-06, + "loss": 1.204, + "step": 3736 + }, + { + "epoch": 0.79, + "learning_rate": 4.612168476862789e-06, + "loss": 1.2585, + "step": 3737 + }, + { + "epoch": 0.79, + "learning_rate": 4.603467739935108e-06, + "loss": 1.2116, + "step": 3738 + }, + { + "epoch": 0.79, + "learning_rate": 4.594774150149251e-06, + "loss": 1.2384, + "step": 3739 + }, + { + "epoch": 0.79, + "learning_rate": 4.586087711540832e-06, + "loss": 1.2116, + "step": 3740 + }, + { + "epoch": 0.79, + "learning_rate": 4.57740842814213e-06, + "loss": 1.2345, + "step": 3741 + }, + { + "epoch": 0.79, + "learning_rate": 4.568736303982115e-06, + "loss": 1.2151, + "step": 3742 + }, + { + "epoch": 0.79, + "learning_rate": 4.560071343086421e-06, + "loss": 1.2314, + "step": 3743 + }, + { + "epoch": 0.79, + "learning_rate": 4.55141354947737e-06, + "loss": 1.2186, + "step": 3744 + }, + { + "epoch": 0.79, + "learning_rate": 4.542762927173941e-06, + "loss": 1.2214, + "step": 3745 + }, + { + "epoch": 0.79, + "learning_rate": 4.534119480191801e-06, + "loss": 1.1697, + "step": 3746 + }, + { + "epoch": 0.79, + "learning_rate": 4.525483212543273e-06, + "loss": 1.2306, + "step": 3747 + }, + { + "epoch": 0.79, + "learning_rate": 4.516854128237358e-06, + "loss": 1.2209, + "step": 3748 + }, + { + "epoch": 0.79, + "learning_rate": 4.5082322312797166e-06, + "loss": 1.259, + "step": 3749 + }, + { + "epoch": 0.79, + "learning_rate": 4.499617525672664e-06, + "loss": 1.22, + "step": 3750 + }, + { + "epoch": 0.79, + "learning_rate": 4.491010015415198e-06, + "loss": 1.2254, + "step": 3751 + }, + { + "epoch": 0.79, + "learning_rate": 4.4824097045029615e-06, + "loss": 1.1593, + "step": 3752 + }, + { + "epoch": 0.79, + "learning_rate": 4.473816596928251e-06, + "loss": 1.2386, + "step": 3753 + }, + { + "epoch": 0.79, + "learning_rate": 4.465230696680038e-06, + "loss": 1.1959, + "step": 3754 + }, + { + "epoch": 0.79, + "learning_rate": 4.456652007743929e-06, + "loss": 1.2521, + "step": 3755 + }, + { + "epoch": 0.79, + "learning_rate": 4.448080534102202e-06, + "loss": 1.2475, + "step": 3756 + }, + { + "epoch": 0.79, + "learning_rate": 4.439516279733764e-06, + "loss": 1.2411, + "step": 3757 + }, + { + "epoch": 0.79, + "learning_rate": 4.430959248614184e-06, + "loss": 1.2321, + "step": 3758 + }, + { + "epoch": 0.79, + "learning_rate": 4.42240944471568e-06, + "loss": 1.2352, + "step": 3759 + }, + { + "epoch": 0.79, + "learning_rate": 4.413866872007104e-06, + "loss": 1.1946, + "step": 3760 + }, + { + "epoch": 0.79, + "learning_rate": 4.405331534453967e-06, + "loss": 1.2099, + "step": 3761 + }, + { + "epoch": 0.79, + "learning_rate": 4.396803436018406e-06, + "loss": 1.2542, + "step": 3762 + }, + { + "epoch": 0.79, + "learning_rate": 4.3882825806592024e-06, + "loss": 1.1668, + "step": 3763 + }, + { + "epoch": 0.79, + "learning_rate": 4.379768972331784e-06, + "loss": 1.3, + "step": 3764 + }, + { + "epoch": 0.79, + "learning_rate": 4.371262614988196e-06, + "loss": 1.275, + "step": 3765 + }, + { + "epoch": 0.79, + "learning_rate": 4.362763512577144e-06, + "loss": 1.2259, + "step": 3766 + }, + { + "epoch": 0.79, + "learning_rate": 4.354271669043934e-06, + "loss": 1.1798, + "step": 3767 + }, + { + "epoch": 0.79, + "learning_rate": 4.345787088330537e-06, + "loss": 1.2543, + "step": 3768 + }, + { + "epoch": 0.79, + "learning_rate": 4.33730977437552e-06, + "loss": 1.2457, + "step": 3769 + }, + { + "epoch": 0.79, + "learning_rate": 4.328839731114101e-06, + "loss": 1.2836, + "step": 3770 + }, + { + "epoch": 0.79, + "learning_rate": 4.3203769624781055e-06, + "loss": 1.2497, + "step": 3771 + }, + { + "epoch": 0.79, + "learning_rate": 4.311921472395999e-06, + "loss": 1.2518, + "step": 3772 + }, + { + "epoch": 0.79, + "learning_rate": 4.303473264792857e-06, + "loss": 1.2356, + "step": 3773 + }, + { + "epoch": 0.79, + "learning_rate": 4.295032343590366e-06, + "loss": 1.2162, + "step": 3774 + }, + { + "epoch": 0.79, + "learning_rate": 4.286598712706858e-06, + "loss": 1.243, + "step": 3775 + }, + { + "epoch": 0.79, + "learning_rate": 4.278172376057246e-06, + "loss": 1.2294, + "step": 3776 + }, + { + "epoch": 0.79, + "learning_rate": 4.269753337553091e-06, + "loss": 1.2745, + "step": 3777 + }, + { + "epoch": 0.79, + "learning_rate": 4.2613416011025424e-06, + "loss": 1.2469, + "step": 3778 + }, + { + "epoch": 0.79, + "learning_rate": 4.25293717061036e-06, + "loss": 1.2469, + "step": 3779 + }, + { + "epoch": 0.8, + "learning_rate": 4.244540049977934e-06, + "loss": 1.2199, + "step": 3780 + }, + { + "epoch": 0.8, + "learning_rate": 4.236150243103234e-06, + "loss": 1.2239, + "step": 3781 + }, + { + "epoch": 0.8, + "learning_rate": 4.227767753880861e-06, + "loss": 1.2538, + "step": 3782 + }, + { + "epoch": 0.8, + "learning_rate": 4.2193925862019934e-06, + "loss": 1.2101, + "step": 3783 + }, + { + "epoch": 0.8, + "learning_rate": 4.211024743954424e-06, + "loss": 1.2115, + "step": 3784 + }, + { + "epoch": 0.8, + "learning_rate": 4.2026642310225505e-06, + "loss": 1.2154, + "step": 3785 + }, + { + "epoch": 0.8, + "learning_rate": 4.194311051287359e-06, + "loss": 1.2042, + "step": 3786 + }, + { + "epoch": 0.8, + "learning_rate": 4.185965208626428e-06, + "loss": 1.2834, + "step": 3787 + }, + { + "epoch": 0.8, + "learning_rate": 4.177626706913948e-06, + "loss": 1.2421, + "step": 3788 + }, + { + "epoch": 0.8, + "learning_rate": 4.16929555002068e-06, + "loss": 1.2578, + "step": 3789 + }, + { + "epoch": 0.8, + "learning_rate": 4.160971741813995e-06, + "loss": 1.1894, + "step": 3790 + }, + { + "epoch": 0.8, + "learning_rate": 4.152655286157834e-06, + "loss": 1.2586, + "step": 3791 + }, + { + "epoch": 0.8, + "learning_rate": 4.144346186912738e-06, + "loss": 1.2576, + "step": 3792 + }, + { + "epoch": 0.8, + "learning_rate": 4.136044447935837e-06, + "loss": 1.3064, + "step": 3793 + }, + { + "epoch": 0.8, + "learning_rate": 4.127750073080829e-06, + "loss": 1.2095, + "step": 3794 + }, + { + "epoch": 0.8, + "learning_rate": 4.119463066197997e-06, + "loss": 1.2668, + "step": 3795 + }, + { + "epoch": 0.8, + "learning_rate": 4.111183431134223e-06, + "loss": 1.2512, + "step": 3796 + }, + { + "epoch": 0.8, + "learning_rate": 4.102911171732933e-06, + "loss": 1.2132, + "step": 3797 + }, + { + "epoch": 0.8, + "learning_rate": 4.094646291834166e-06, + "loss": 1.2339, + "step": 3798 + }, + { + "epoch": 0.8, + "learning_rate": 4.086388795274508e-06, + "loss": 1.2086, + "step": 3799 + }, + { + "epoch": 0.8, + "learning_rate": 4.078138685887125e-06, + "loss": 1.2069, + "step": 3800 + }, + { + "epoch": 0.8, + "learning_rate": 4.069895967501765e-06, + "loss": 1.1852, + "step": 3801 + }, + { + "epoch": 0.8, + "learning_rate": 4.0616606439447315e-06, + "loss": 1.2256, + "step": 3802 + }, + { + "epoch": 0.8, + "learning_rate": 4.053432719038895e-06, + "loss": 1.2168, + "step": 3803 + }, + { + "epoch": 0.8, + "learning_rate": 4.045212196603705e-06, + "loss": 1.2169, + "step": 3804 + }, + { + "epoch": 0.8, + "learning_rate": 4.03699908045516e-06, + "loss": 1.2411, + "step": 3805 + }, + { + "epoch": 0.8, + "learning_rate": 4.028793374405833e-06, + "loss": 1.2408, + "step": 3806 + }, + { + "epoch": 0.8, + "learning_rate": 4.020595082264847e-06, + "loss": 1.2576, + "step": 3807 + }, + { + "epoch": 0.8, + "learning_rate": 4.012404207837881e-06, + "loss": 1.2201, + "step": 3808 + }, + { + "epoch": 0.8, + "learning_rate": 4.0042207549271905e-06, + "loss": 1.2566, + "step": 3809 + }, + { + "epoch": 0.8, + "learning_rate": 3.996044727331558e-06, + "loss": 1.2368, + "step": 3810 + }, + { + "epoch": 0.8, + "learning_rate": 3.987876128846349e-06, + "loss": 1.2535, + "step": 3811 + }, + { + "epoch": 0.8, + "learning_rate": 3.979714963263455e-06, + "loss": 1.2385, + "step": 3812 + }, + { + "epoch": 0.8, + "learning_rate": 3.971561234371324e-06, + "loss": 1.2306, + "step": 3813 + }, + { + "epoch": 0.8, + "learning_rate": 3.963414945954962e-06, + "loss": 1.2366, + "step": 3814 + }, + { + "epoch": 0.8, + "learning_rate": 3.955276101795908e-06, + "loss": 1.2534, + "step": 3815 + }, + { + "epoch": 0.8, + "learning_rate": 3.947144705672257e-06, + "loss": 1.2208, + "step": 3816 + }, + { + "epoch": 0.8, + "learning_rate": 3.939020761358641e-06, + "loss": 1.241, + "step": 3817 + }, + { + "epoch": 0.8, + "learning_rate": 3.930904272626226e-06, + "loss": 1.2647, + "step": 3818 + }, + { + "epoch": 0.8, + "learning_rate": 3.922795243242734e-06, + "loss": 1.2261, + "step": 3819 + }, + { + "epoch": 0.8, + "learning_rate": 3.914693676972408e-06, + "loss": 1.2204, + "step": 3820 + }, + { + "epoch": 0.8, + "learning_rate": 3.906599577576027e-06, + "loss": 1.1952, + "step": 3821 + }, + { + "epoch": 0.8, + "learning_rate": 3.898512948810922e-06, + "loss": 1.2485, + "step": 3822 + }, + { + "epoch": 0.8, + "learning_rate": 3.890433794430934e-06, + "loss": 1.2584, + "step": 3823 + }, + { + "epoch": 0.8, + "learning_rate": 3.882362118186445e-06, + "loss": 1.2613, + "step": 3824 + }, + { + "epoch": 0.8, + "learning_rate": 3.87429792382437e-06, + "loss": 1.2003, + "step": 3825 + }, + { + "epoch": 0.8, + "learning_rate": 3.86624121508814e-06, + "loss": 1.246, + "step": 3826 + }, + { + "epoch": 0.8, + "learning_rate": 3.858191995717722e-06, + "loss": 1.2902, + "step": 3827 + }, + { + "epoch": 0.81, + "learning_rate": 3.850150269449597e-06, + "loss": 1.2454, + "step": 3828 + }, + { + "epoch": 0.81, + "learning_rate": 3.84211604001677e-06, + "loss": 1.2498, + "step": 3829 + }, + { + "epoch": 0.81, + "learning_rate": 3.834089311148774e-06, + "loss": 1.2363, + "step": 3830 + }, + { + "epoch": 0.81, + "learning_rate": 3.826070086571651e-06, + "loss": 1.2316, + "step": 3831 + }, + { + "epoch": 0.81, + "learning_rate": 3.818058370007956e-06, + "loss": 1.2111, + "step": 3832 + }, + { + "epoch": 0.81, + "learning_rate": 3.810054165176775e-06, + "loss": 1.2284, + "step": 3833 + }, + { + "epoch": 0.81, + "learning_rate": 3.802057475793688e-06, + "loss": 1.1595, + "step": 3834 + }, + { + "epoch": 0.81, + "learning_rate": 3.794068305570804e-06, + "loss": 1.2422, + "step": 3835 + }, + { + "epoch": 0.81, + "learning_rate": 3.78608665821673e-06, + "loss": 1.2094, + "step": 3836 + }, + { + "epoch": 0.81, + "learning_rate": 3.778112537436578e-06, + "loss": 1.2318, + "step": 3837 + }, + { + "epoch": 0.81, + "learning_rate": 3.7701459469319824e-06, + "loss": 1.268, + "step": 3838 + }, + { + "epoch": 0.81, + "learning_rate": 3.7621868904010585e-06, + "loss": 1.2058, + "step": 3839 + }, + { + "epoch": 0.81, + "learning_rate": 3.7542353715384462e-06, + "loss": 1.2071, + "step": 3840 + }, + { + "epoch": 0.81, + "learning_rate": 3.7462913940352797e-06, + "loss": 1.2856, + "step": 3841 + }, + { + "epoch": 0.81, + "learning_rate": 3.7383549615791826e-06, + "loss": 1.275, + "step": 3842 + }, + { + "epoch": 0.81, + "learning_rate": 3.7304260778542924e-06, + "loss": 1.2445, + "step": 3843 + }, + { + "epoch": 0.81, + "learning_rate": 3.722504746541229e-06, + "loss": 1.2283, + "step": 3844 + }, + { + "epoch": 0.81, + "learning_rate": 3.714590971317107e-06, + "loss": 1.2413, + "step": 3845 + }, + { + "epoch": 0.81, + "learning_rate": 3.706684755855545e-06, + "loss": 1.2882, + "step": 3846 + }, + { + "epoch": 0.81, + "learning_rate": 3.698786103826639e-06, + "loss": 1.2922, + "step": 3847 + }, + { + "epoch": 0.81, + "learning_rate": 3.690895018896987e-06, + "loss": 1.2429, + "step": 3848 + }, + { + "epoch": 0.81, + "learning_rate": 3.6830115047296633e-06, + "loss": 1.2325, + "step": 3849 + }, + { + "epoch": 0.81, + "learning_rate": 3.675135564984227e-06, + "loss": 1.1791, + "step": 3850 + }, + { + "epoch": 0.81, + "learning_rate": 3.6672672033167333e-06, + "loss": 1.2524, + "step": 3851 + }, + { + "epoch": 0.81, + "learning_rate": 3.6594064233797123e-06, + "loss": 1.2261, + "step": 3852 + }, + { + "epoch": 0.81, + "learning_rate": 3.6515532288221646e-06, + "loss": 1.2731, + "step": 3853 + }, + { + "epoch": 0.81, + "learning_rate": 3.643707623289592e-06, + "loss": 1.2579, + "step": 3854 + }, + { + "epoch": 0.81, + "learning_rate": 3.635869610423952e-06, + "loss": 1.2219, + "step": 3855 + }, + { + "epoch": 0.81, + "learning_rate": 3.628039193863695e-06, + "loss": 1.2137, + "step": 3856 + }, + { + "epoch": 0.81, + "learning_rate": 3.6202163772437326e-06, + "loss": 1.2421, + "step": 3857 + }, + { + "epoch": 0.81, + "learning_rate": 3.6124011641954473e-06, + "loss": 1.272, + "step": 3858 + }, + { + "epoch": 0.81, + "learning_rate": 3.6045935583467053e-06, + "loss": 1.2398, + "step": 3859 + }, + { + "epoch": 0.81, + "learning_rate": 3.5967935633218277e-06, + "loss": 1.2371, + "step": 3860 + }, + { + "epoch": 0.81, + "learning_rate": 3.589001182741616e-06, + "loss": 1.2357, + "step": 3861 + }, + { + "epoch": 0.81, + "learning_rate": 3.5812164202233236e-06, + "loss": 1.1327, + "step": 3862 + }, + { + "epoch": 0.81, + "learning_rate": 3.5734392793806704e-06, + "loss": 1.1843, + "step": 3863 + }, + { + "epoch": 0.81, + "learning_rate": 3.5656697638238447e-06, + "loss": 1.2136, + "step": 3864 + }, + { + "epoch": 0.81, + "learning_rate": 3.5579078771594988e-06, + "loss": 1.2392, + "step": 3865 + }, + { + "epoch": 0.81, + "learning_rate": 3.550153622990724e-06, + "loss": 1.2362, + "step": 3866 + }, + { + "epoch": 0.81, + "learning_rate": 3.542407004917092e-06, + "loss": 1.1976, + "step": 3867 + }, + { + "epoch": 0.81, + "learning_rate": 3.5346680265346113e-06, + "loss": 1.2348, + "step": 3868 + }, + { + "epoch": 0.81, + "learning_rate": 3.5269366914357585e-06, + "loss": 1.2678, + "step": 3869 + }, + { + "epoch": 0.81, + "learning_rate": 3.5192130032094517e-06, + "loss": 1.256, + "step": 3870 + }, + { + "epoch": 0.81, + "learning_rate": 3.511496965441057e-06, + "loss": 1.1927, + "step": 3871 + }, + { + "epoch": 0.81, + "learning_rate": 3.503788581712406e-06, + "loss": 1.2388, + "step": 3872 + }, + { + "epoch": 0.81, + "learning_rate": 3.4960878556017597e-06, + "loss": 1.2157, + "step": 3873 + }, + { + "epoch": 0.81, + "learning_rate": 3.488394790683829e-06, + "loss": 1.238, + "step": 3874 + }, + { + "epoch": 0.82, + "learning_rate": 3.480709390529777e-06, + "loss": 1.2533, + "step": 3875 + }, + { + "epoch": 0.82, + "learning_rate": 3.473031658707193e-06, + "loss": 1.1969, + "step": 3876 + }, + { + "epoch": 0.82, + "learning_rate": 3.465361598780128e-06, + "loss": 1.2303, + "step": 3877 + }, + { + "epoch": 0.82, + "learning_rate": 3.4576992143090517e-06, + "loss": 1.2805, + "step": 3878 + }, + { + "epoch": 0.82, + "learning_rate": 3.450044508850876e-06, + "loss": 1.26, + "step": 3879 + }, + { + "epoch": 0.82, + "learning_rate": 3.4423974859589594e-06, + "loss": 1.1821, + "step": 3880 + }, + { + "epoch": 0.82, + "learning_rate": 3.4347581491830796e-06, + "loss": 1.2238, + "step": 3881 + }, + { + "epoch": 0.82, + "learning_rate": 3.427126502069449e-06, + "loss": 1.2715, + "step": 3882 + }, + { + "epoch": 0.82, + "learning_rate": 3.4195025481607224e-06, + "loss": 1.2255, + "step": 3883 + }, + { + "epoch": 0.82, + "learning_rate": 3.411886290995965e-06, + "loss": 1.2507, + "step": 3884 + }, + { + "epoch": 0.82, + "learning_rate": 3.4042777341106903e-06, + "loss": 1.2484, + "step": 3885 + }, + { + "epoch": 0.82, + "learning_rate": 3.3966768810368132e-06, + "loss": 1.2795, + "step": 3886 + }, + { + "epoch": 0.82, + "learning_rate": 3.3890837353026964e-06, + "loss": 1.2657, + "step": 3887 + }, + { + "epoch": 0.82, + "learning_rate": 3.3814983004331014e-06, + "loss": 1.2332, + "step": 3888 + }, + { + "epoch": 0.82, + "learning_rate": 3.373920579949237e-06, + "loss": 1.2309, + "step": 3889 + }, + { + "epoch": 0.82, + "learning_rate": 3.3663505773687023e-06, + "loss": 1.2391, + "step": 3890 + }, + { + "epoch": 0.82, + "learning_rate": 3.3587882962055374e-06, + "loss": 1.2462, + "step": 3891 + }, + { + "epoch": 0.82, + "learning_rate": 3.3512337399701813e-06, + "loss": 1.2391, + "step": 3892 + }, + { + "epoch": 0.82, + "learning_rate": 3.3436869121695013e-06, + "loss": 1.2377, + "step": 3893 + }, + { + "epoch": 0.82, + "learning_rate": 3.3361478163067673e-06, + "loss": 1.2021, + "step": 3894 + }, + { + "epoch": 0.82, + "learning_rate": 3.328616455881657e-06, + "loss": 1.2061, + "step": 3895 + }, + { + "epoch": 0.82, + "learning_rate": 3.3210928343902716e-06, + "loss": 1.2055, + "step": 3896 + }, + { + "epoch": 0.82, + "learning_rate": 3.3135769553251017e-06, + "loss": 1.2479, + "step": 3897 + }, + { + "epoch": 0.82, + "learning_rate": 3.3060688221750637e-06, + "loss": 1.2068, + "step": 3898 + }, + { + "epoch": 0.82, + "learning_rate": 3.2985684384254648e-06, + "loss": 1.2059, + "step": 3899 + }, + { + "epoch": 0.82, + "learning_rate": 3.2910758075580085e-06, + "loss": 1.2377, + "step": 3900 + }, + { + "epoch": 0.82, + "learning_rate": 3.283590933050822e-06, + "loss": 1.2323, + "step": 3901 + }, + { + "epoch": 0.82, + "learning_rate": 3.2761138183784126e-06, + "loss": 1.2227, + "step": 3902 + }, + { + "epoch": 0.82, + "learning_rate": 3.2686444670116878e-06, + "loss": 1.1798, + "step": 3903 + }, + { + "epoch": 0.82, + "learning_rate": 3.261182882417966e-06, + "loss": 1.2491, + "step": 3904 + }, + { + "epoch": 0.82, + "learning_rate": 3.253729068060938e-06, + "loss": 1.2497, + "step": 3905 + }, + { + "epoch": 0.82, + "learning_rate": 3.2462830274007073e-06, + "loss": 1.2374, + "step": 3906 + }, + { + "epoch": 0.82, + "learning_rate": 3.2388447638937583e-06, + "loss": 1.237, + "step": 3907 + }, + { + "epoch": 0.82, + "learning_rate": 3.2314142809929617e-06, + "loss": 1.2184, + "step": 3908 + }, + { + "epoch": 0.82, + "learning_rate": 3.223991582147592e-06, + "loss": 1.2358, + "step": 3909 + }, + { + "epoch": 0.82, + "learning_rate": 3.216576670803291e-06, + "loss": 1.1994, + "step": 3910 + }, + { + "epoch": 0.82, + "learning_rate": 3.2091695504021047e-06, + "loss": 1.2259, + "step": 3911 + }, + { + "epoch": 0.82, + "learning_rate": 3.2017702243824434e-06, + "loss": 1.2491, + "step": 3912 + }, + { + "epoch": 0.82, + "learning_rate": 3.1943786961791166e-06, + "loss": 1.25, + "step": 3913 + }, + { + "epoch": 0.82, + "learning_rate": 3.1869949692232982e-06, + "loss": 1.1986, + "step": 3914 + }, + { + "epoch": 0.82, + "learning_rate": 3.179619046942557e-06, + "loss": 1.2398, + "step": 3915 + }, + { + "epoch": 0.82, + "learning_rate": 3.172250932760823e-06, + "loss": 1.1818, + "step": 3916 + }, + { + "epoch": 0.82, + "learning_rate": 3.164890630098416e-06, + "loss": 1.2342, + "step": 3917 + }, + { + "epoch": 0.82, + "learning_rate": 3.1575381423720142e-06, + "loss": 1.2302, + "step": 3918 + }, + { + "epoch": 0.82, + "learning_rate": 3.150193472994687e-06, + "loss": 1.2234, + "step": 3919 + }, + { + "epoch": 0.82, + "learning_rate": 3.142856625375856e-06, + "loss": 1.2781, + "step": 3920 + }, + { + "epoch": 0.82, + "learning_rate": 3.13552760292132e-06, + "loss": 1.2061, + "step": 3921 + }, + { + "epoch": 0.82, + "learning_rate": 3.1282064090332522e-06, + "loss": 1.1835, + "step": 3922 + }, + { + "epoch": 0.83, + "learning_rate": 3.1208930471101786e-06, + "loss": 1.219, + "step": 3923 + }, + { + "epoch": 0.83, + "learning_rate": 3.1135875205469946e-06, + "loss": 1.2042, + "step": 3924 + }, + { + "epoch": 0.83, + "learning_rate": 3.1062898327349656e-06, + "loss": 1.2249, + "step": 3925 + }, + { + "epoch": 0.83, + "learning_rate": 3.098999987061706e-06, + "loss": 1.2185, + "step": 3926 + }, + { + "epoch": 0.83, + "learning_rate": 3.0917179869112023e-06, + "loss": 1.2449, + "step": 3927 + }, + { + "epoch": 0.83, + "learning_rate": 3.084443835663791e-06, + "loss": 1.1974, + "step": 3928 + }, + { + "epoch": 0.83, + "learning_rate": 3.077177536696159e-06, + "loss": 1.259, + "step": 3929 + }, + { + "epoch": 0.83, + "learning_rate": 3.0699190933813683e-06, + "loss": 1.2247, + "step": 3930 + }, + { + "epoch": 0.83, + "learning_rate": 3.0626685090888177e-06, + "loss": 1.2188, + "step": 3931 + }, + { + "epoch": 0.83, + "learning_rate": 3.0554257871842543e-06, + "loss": 1.2518, + "step": 3932 + }, + { + "epoch": 0.83, + "learning_rate": 3.0481909310297954e-06, + "loss": 1.2321, + "step": 3933 + }, + { + "epoch": 0.83, + "learning_rate": 3.0409639439838833e-06, + "loss": 1.2839, + "step": 3934 + }, + { + "epoch": 0.83, + "learning_rate": 3.0337448294013307e-06, + "loss": 1.2261, + "step": 3935 + }, + { + "epoch": 0.83, + "learning_rate": 3.0265335906332717e-06, + "loss": 1.1707, + "step": 3936 + }, + { + "epoch": 0.83, + "learning_rate": 3.019330231027209e-06, + "loss": 1.2104, + "step": 3937 + }, + { + "epoch": 0.83, + "learning_rate": 3.012134753926965e-06, + "loss": 1.2262, + "step": 3938 + }, + { + "epoch": 0.83, + "learning_rate": 3.0049471626727246e-06, + "loss": 1.2489, + "step": 3939 + }, + { + "epoch": 0.83, + "learning_rate": 2.997767460600991e-06, + "loss": 1.2649, + "step": 3940 + }, + { + "epoch": 0.83, + "learning_rate": 2.990595651044621e-06, + "loss": 1.1955, + "step": 3941 + }, + { + "epoch": 0.83, + "learning_rate": 2.9834317373327983e-06, + "loss": 1.2643, + "step": 3942 + }, + { + "epoch": 0.83, + "learning_rate": 2.976275722791051e-06, + "loss": 1.2107, + "step": 3943 + }, + { + "epoch": 0.83, + "learning_rate": 2.9691276107412293e-06, + "loss": 1.2201, + "step": 3944 + }, + { + "epoch": 0.83, + "learning_rate": 2.961987404501516e-06, + "loss": 1.2721, + "step": 3945 + }, + { + "epoch": 0.83, + "learning_rate": 2.9548551073864386e-06, + "loss": 1.2075, + "step": 3946 + }, + { + "epoch": 0.83, + "learning_rate": 2.947730722706832e-06, + "loss": 1.236, + "step": 3947 + }, + { + "epoch": 0.83, + "learning_rate": 2.94061425376988e-06, + "loss": 1.2498, + "step": 3948 + }, + { + "epoch": 0.83, + "learning_rate": 2.9335057038790715e-06, + "loss": 1.1947, + "step": 3949 + }, + { + "epoch": 0.83, + "learning_rate": 2.9264050763342267e-06, + "loss": 1.2651, + "step": 3950 + }, + { + "epoch": 0.83, + "learning_rate": 2.9193123744315e-06, + "loss": 1.2381, + "step": 3951 + }, + { + "epoch": 0.83, + "learning_rate": 2.912227601463351e-06, + "loss": 1.2728, + "step": 3952 + }, + { + "epoch": 0.83, + "learning_rate": 2.9051507607185603e-06, + "loss": 1.2692, + "step": 3953 + }, + { + "epoch": 0.83, + "learning_rate": 2.8980818554822376e-06, + "loss": 1.2416, + "step": 3954 + }, + { + "epoch": 0.83, + "learning_rate": 2.8910208890357916e-06, + "loss": 1.2354, + "step": 3955 + }, + { + "epoch": 0.83, + "learning_rate": 2.883967864656969e-06, + "loss": 1.2996, + "step": 3956 + }, + { + "epoch": 0.83, + "learning_rate": 2.876922785619809e-06, + "loss": 1.2389, + "step": 3957 + }, + { + "epoch": 0.83, + "learning_rate": 2.8698856551946664e-06, + "loss": 1.1833, + "step": 3958 + }, + { + "epoch": 0.83, + "learning_rate": 2.8628564766482193e-06, + "loss": 1.2702, + "step": 3959 + }, + { + "epoch": 0.83, + "learning_rate": 2.855835253243433e-06, + "loss": 1.2147, + "step": 3960 + }, + { + "epoch": 0.83, + "learning_rate": 2.848821988239605e-06, + "loss": 1.242, + "step": 3961 + }, + { + "epoch": 0.83, + "learning_rate": 2.8418166848923158e-06, + "loss": 1.1824, + "step": 3962 + }, + { + "epoch": 0.83, + "learning_rate": 2.834819346453468e-06, + "loss": 1.2445, + "step": 3963 + }, + { + "epoch": 0.83, + "learning_rate": 2.827829976171248e-06, + "loss": 1.2354, + "step": 3964 + }, + { + "epoch": 0.83, + "learning_rate": 2.820848577290165e-06, + "loss": 1.2483, + "step": 3965 + }, + { + "epoch": 0.83, + "learning_rate": 2.8138751530510065e-06, + "loss": 1.2257, + "step": 3966 + }, + { + "epoch": 0.83, + "learning_rate": 2.806909706690881e-06, + "loss": 1.2184, + "step": 3967 + }, + { + "epoch": 0.83, + "learning_rate": 2.799952241443167e-06, + "loss": 1.3169, + "step": 3968 + }, + { + "epoch": 0.83, + "learning_rate": 2.7930027605375644e-06, + "loss": 1.2348, + "step": 3969 + }, + { + "epoch": 0.84, + "learning_rate": 2.7860612672000485e-06, + "loss": 1.2304, + "step": 3970 + }, + { + "epoch": 0.84, + "learning_rate": 2.7791277646528893e-06, + "loss": 1.2483, + "step": 3971 + }, + { + "epoch": 0.84, + "learning_rate": 2.77220225611466e-06, + "loss": 1.2624, + "step": 3972 + }, + { + "epoch": 0.84, + "learning_rate": 2.7652847448002074e-06, + "loss": 1.224, + "step": 3973 + }, + { + "epoch": 0.84, + "learning_rate": 2.7583752339206714e-06, + "loss": 1.2417, + "step": 3974 + }, + { + "epoch": 0.84, + "learning_rate": 2.7514737266834845e-06, + "loss": 1.1722, + "step": 3975 + }, + { + "epoch": 0.84, + "learning_rate": 2.7445802262923505e-06, + "loss": 1.1919, + "step": 3976 + }, + { + "epoch": 0.84, + "learning_rate": 2.737694735947276e-06, + "loss": 1.1951, + "step": 3977 + }, + { + "epoch": 0.84, + "learning_rate": 2.730817258844529e-06, + "loss": 1.2739, + "step": 3978 + }, + { + "epoch": 0.84, + "learning_rate": 2.723947798176665e-06, + "loss": 1.1915, + "step": 3979 + }, + { + "epoch": 0.84, + "learning_rate": 2.7170863571325257e-06, + "loss": 1.2676, + "step": 3980 + }, + { + "epoch": 0.84, + "learning_rate": 2.7102329388972215e-06, + "loss": 1.2477, + "step": 3981 + }, + { + "epoch": 0.84, + "learning_rate": 2.7033875466521363e-06, + "loss": 1.2373, + "step": 3982 + }, + { + "epoch": 0.84, + "learning_rate": 2.696550183574942e-06, + "loss": 1.213, + "step": 3983 + }, + { + "epoch": 0.84, + "learning_rate": 2.6897208528395656e-06, + "loss": 1.1821, + "step": 3984 + }, + { + "epoch": 0.84, + "learning_rate": 2.682899557616223e-06, + "loss": 1.2195, + "step": 3985 + }, + { + "epoch": 0.84, + "learning_rate": 2.676086301071381e-06, + "loss": 1.2169, + "step": 3986 + }, + { + "epoch": 0.84, + "learning_rate": 2.66928108636779e-06, + "loss": 1.2245, + "step": 3987 + }, + { + "epoch": 0.84, + "learning_rate": 2.662483916664467e-06, + "loss": 1.2297, + "step": 3988 + }, + { + "epoch": 0.84, + "learning_rate": 2.6556947951166836e-06, + "loss": 1.1888, + "step": 3989 + }, + { + "epoch": 0.84, + "learning_rate": 2.648913724875981e-06, + "loss": 1.2278, + "step": 3990 + }, + { + "epoch": 0.84, + "learning_rate": 2.6421407090901707e-06, + "loss": 1.2259, + "step": 3991 + }, + { + "epoch": 0.84, + "learning_rate": 2.635375750903306e-06, + "loss": 1.2322, + "step": 3992 + }, + { + "epoch": 0.84, + "learning_rate": 2.628618853455727e-06, + "loss": 1.2886, + "step": 3993 + }, + { + "epoch": 0.84, + "learning_rate": 2.621870019884005e-06, + "loss": 1.2436, + "step": 3994 + }, + { + "epoch": 0.84, + "learning_rate": 2.6151292533209826e-06, + "loss": 1.2344, + "step": 3995 + }, + { + "epoch": 0.84, + "learning_rate": 2.6083965568957603e-06, + "loss": 1.2718, + "step": 3996 + }, + { + "epoch": 0.84, + "learning_rate": 2.601671933733678e-06, + "loss": 1.2734, + "step": 3997 + }, + { + "epoch": 0.84, + "learning_rate": 2.594955386956346e-06, + "loss": 1.2458, + "step": 3998 + }, + { + "epoch": 0.84, + "learning_rate": 2.588246919681614e-06, + "loss": 1.2649, + "step": 3999 + }, + { + "epoch": 0.84, + "learning_rate": 2.5815465350235756e-06, + "loss": 1.2268, + "step": 4000 + }, + { + "epoch": 0.84, + "learning_rate": 2.5748542360925944e-06, + "loss": 1.2762, + "step": 4001 + }, + { + "epoch": 0.84, + "learning_rate": 2.568170025995258e-06, + "loss": 1.2597, + "step": 4002 + }, + { + "epoch": 0.84, + "learning_rate": 2.561493907834405e-06, + "loss": 1.2471, + "step": 4003 + }, + { + "epoch": 0.84, + "learning_rate": 2.5548258847091266e-06, + "loss": 1.2999, + "step": 4004 + }, + { + "epoch": 0.84, + "learning_rate": 2.548165959714748e-06, + "loss": 1.1962, + "step": 4005 + }, + { + "epoch": 0.84, + "learning_rate": 2.54151413594284e-06, + "loss": 1.2221, + "step": 4006 + }, + { + "epoch": 0.84, + "learning_rate": 2.534870416481208e-06, + "loss": 1.2091, + "step": 4007 + }, + { + "epoch": 0.84, + "learning_rate": 2.5282348044138915e-06, + "loss": 1.1968, + "step": 4008 + }, + { + "epoch": 0.84, + "learning_rate": 2.521607302821183e-06, + "loss": 1.2388, + "step": 4009 + }, + { + "epoch": 0.84, + "learning_rate": 2.514987914779592e-06, + "loss": 1.1939, + "step": 4010 + }, + { + "epoch": 0.84, + "learning_rate": 2.5083766433618695e-06, + "loss": 1.2559, + "step": 4011 + }, + { + "epoch": 0.84, + "learning_rate": 2.5017734916370073e-06, + "loss": 1.2481, + "step": 4012 + }, + { + "epoch": 0.84, + "learning_rate": 2.495178462670207e-06, + "loss": 1.2342, + "step": 4013 + }, + { + "epoch": 0.84, + "learning_rate": 2.4885915595229215e-06, + "loss": 1.2446, + "step": 4014 + }, + { + "epoch": 0.84, + "learning_rate": 2.4820127852528163e-06, + "loss": 1.2516, + "step": 4015 + }, + { + "epoch": 0.84, + "learning_rate": 2.4754421429137887e-06, + "loss": 1.138, + "step": 4016 + }, + { + "epoch": 0.84, + "learning_rate": 2.468879635555965e-06, + "loss": 1.231, + "step": 4017 + }, + { + "epoch": 0.85, + "learning_rate": 2.462325266225687e-06, + "loss": 1.1968, + "step": 4018 + }, + { + "epoch": 0.85, + "learning_rate": 2.455779037965529e-06, + "loss": 1.2174, + "step": 4019 + }, + { + "epoch": 0.85, + "learning_rate": 2.4492409538142803e-06, + "loss": 1.263, + "step": 4020 + }, + { + "epoch": 0.85, + "learning_rate": 2.44271101680694e-06, + "loss": 1.2434, + "step": 4021 + }, + { + "epoch": 0.85, + "learning_rate": 2.436189229974748e-06, + "loss": 1.2122, + "step": 4022 + }, + { + "epoch": 0.85, + "learning_rate": 2.4296755963451424e-06, + "loss": 1.2643, + "step": 4023 + }, + { + "epoch": 0.85, + "learning_rate": 2.423170118941778e-06, + "loss": 1.2076, + "step": 4024 + }, + { + "epoch": 0.85, + "learning_rate": 2.4166728007845364e-06, + "loss": 1.212, + "step": 4025 + }, + { + "epoch": 0.85, + "learning_rate": 2.4101836448894924e-06, + "loss": 1.2976, + "step": 4026 + }, + { + "epoch": 0.85, + "learning_rate": 2.4037026542689555e-06, + "loss": 1.2679, + "step": 4027 + }, + { + "epoch": 0.85, + "learning_rate": 2.3972298319314224e-06, + "loss": 1.2742, + "step": 4028 + }, + { + "epoch": 0.85, + "learning_rate": 2.3907651808816067e-06, + "loss": 1.2112, + "step": 4029 + }, + { + "epoch": 0.85, + "learning_rate": 2.384308704120435e-06, + "loss": 1.2603, + "step": 4030 + }, + { + "epoch": 0.85, + "learning_rate": 2.3778604046450313e-06, + "loss": 1.2175, + "step": 4031 + }, + { + "epoch": 0.85, + "learning_rate": 2.371420285448722e-06, + "loss": 1.1484, + "step": 4032 + }, + { + "epoch": 0.85, + "learning_rate": 2.364988349521049e-06, + "loss": 1.2499, + "step": 4033 + }, + { + "epoch": 0.85, + "learning_rate": 2.358564599847737e-06, + "loss": 1.2726, + "step": 4034 + }, + { + "epoch": 0.85, + "learning_rate": 2.352149039410727e-06, + "loss": 1.207, + "step": 4035 + }, + { + "epoch": 0.85, + "learning_rate": 2.345741671188153e-06, + "loss": 1.231, + "step": 4036 + }, + { + "epoch": 0.85, + "learning_rate": 2.33934249815434e-06, + "loss": 1.2326, + "step": 4037 + }, + { + "epoch": 0.85, + "learning_rate": 2.3329515232798207e-06, + "loss": 1.259, + "step": 4038 + }, + { + "epoch": 0.85, + "learning_rate": 2.3265687495313106e-06, + "loss": 1.2424, + "step": 4039 + }, + { + "epoch": 0.85, + "learning_rate": 2.3201941798717176e-06, + "loss": 1.2398, + "step": 4040 + }, + { + "epoch": 0.85, + "learning_rate": 2.313827817260159e-06, + "loss": 1.2136, + "step": 4041 + }, + { + "epoch": 0.85, + "learning_rate": 2.307469664651918e-06, + "loss": 1.1884, + "step": 4042 + }, + { + "epoch": 0.85, + "learning_rate": 2.3011197249984886e-06, + "loss": 1.2505, + "step": 4043 + }, + { + "epoch": 0.85, + "learning_rate": 2.2947780012475396e-06, + "loss": 1.2538, + "step": 4044 + }, + { + "epoch": 0.85, + "learning_rate": 2.2884444963429188e-06, + "loss": 1.1803, + "step": 4045 + }, + { + "epoch": 0.85, + "learning_rate": 2.282119213224683e-06, + "loss": 1.1689, + "step": 4046 + }, + { + "epoch": 0.85, + "learning_rate": 2.2758021548290478e-06, + "loss": 1.2234, + "step": 4047 + }, + { + "epoch": 0.85, + "learning_rate": 2.2694933240884277e-06, + "loss": 1.2584, + "step": 4048 + }, + { + "epoch": 0.85, + "learning_rate": 2.263192723931409e-06, + "loss": 1.1257, + "step": 4049 + }, + { + "epoch": 0.85, + "learning_rate": 2.2569003572827543e-06, + "loss": 1.2695, + "step": 4050 + }, + { + "epoch": 0.85, + "learning_rate": 2.250616227063418e-06, + "loss": 1.2412, + "step": 4051 + }, + { + "epoch": 0.85, + "learning_rate": 2.244340336190518e-06, + "loss": 1.2503, + "step": 4052 + }, + { + "epoch": 0.85, + "learning_rate": 2.2380726875773507e-06, + "loss": 1.2265, + "step": 4053 + }, + { + "epoch": 0.85, + "learning_rate": 2.2318132841333906e-06, + "loss": 1.2291, + "step": 4054 + }, + { + "epoch": 0.85, + "learning_rate": 2.2255621287642805e-06, + "loss": 1.2869, + "step": 4055 + }, + { + "epoch": 0.85, + "learning_rate": 2.2193192243718385e-06, + "loss": 1.2072, + "step": 4056 + }, + { + "epoch": 0.85, + "learning_rate": 2.2130845738540475e-06, + "loss": 1.2413, + "step": 4057 + }, + { + "epoch": 0.85, + "learning_rate": 2.2068581801050557e-06, + "loss": 1.2431, + "step": 4058 + }, + { + "epoch": 0.85, + "learning_rate": 2.2006400460151923e-06, + "loss": 1.2413, + "step": 4059 + }, + { + "epoch": 0.85, + "learning_rate": 2.1944301744709428e-06, + "loss": 1.2783, + "step": 4060 + }, + { + "epoch": 0.85, + "learning_rate": 2.1882285683549555e-06, + "loss": 1.1931, + "step": 4061 + }, + { + "epoch": 0.85, + "learning_rate": 2.1820352305460492e-06, + "loss": 1.2397, + "step": 4062 + }, + { + "epoch": 0.85, + "learning_rate": 2.1758501639191908e-06, + "loss": 1.1991, + "step": 4063 + }, + { + "epoch": 0.85, + "learning_rate": 2.169673371345531e-06, + "loss": 1.2471, + "step": 4064 + }, + { + "epoch": 0.86, + "learning_rate": 2.1635048556923555e-06, + "loss": 1.2106, + "step": 4065 + }, + { + "epoch": 0.86, + "learning_rate": 2.1573446198231185e-06, + "loss": 1.1624, + "step": 4066 + }, + { + "epoch": 0.86, + "learning_rate": 2.1511926665974324e-06, + "loss": 1.2528, + "step": 4067 + }, + { + "epoch": 0.86, + "learning_rate": 2.1450489988710644e-06, + "loss": 1.1853, + "step": 4068 + }, + { + "epoch": 0.86, + "learning_rate": 2.138913619495928e-06, + "loss": 1.1842, + "step": 4069 + }, + { + "epoch": 0.86, + "learning_rate": 2.1327865313201015e-06, + "loss": 1.2136, + "step": 4070 + }, + { + "epoch": 0.86, + "learning_rate": 2.1266677371877996e-06, + "loss": 1.1817, + "step": 4071 + }, + { + "epoch": 0.86, + "learning_rate": 2.120557239939405e-06, + "loss": 1.1964, + "step": 4072 + }, + { + "epoch": 0.86, + "learning_rate": 2.114455042411432e-06, + "loss": 1.2304, + "step": 4073 + }, + { + "epoch": 0.86, + "learning_rate": 2.108361147436546e-06, + "loss": 1.2244, + "step": 4074 + }, + { + "epoch": 0.86, + "learning_rate": 2.1022755578435715e-06, + "loss": 1.2338, + "step": 4075 + }, + { + "epoch": 0.86, + "learning_rate": 2.0961982764574597e-06, + "loss": 1.1968, + "step": 4076 + }, + { + "epoch": 0.86, + "learning_rate": 2.0901293060993154e-06, + "loss": 1.2226, + "step": 4077 + }, + { + "epoch": 0.86, + "learning_rate": 2.0840686495863837e-06, + "loss": 1.2201, + "step": 4078 + }, + { + "epoch": 0.86, + "learning_rate": 2.078016309732047e-06, + "loss": 1.2608, + "step": 4079 + }, + { + "epoch": 0.86, + "learning_rate": 2.0719722893458317e-06, + "loss": 1.2443, + "step": 4080 + }, + { + "epoch": 0.86, + "learning_rate": 2.0659365912333972e-06, + "loss": 1.202, + "step": 4081 + }, + { + "epoch": 0.86, + "learning_rate": 2.0599092181965474e-06, + "loss": 1.1398, + "step": 4082 + }, + { + "epoch": 0.86, + "learning_rate": 2.0538901730332128e-06, + "loss": 1.2773, + "step": 4083 + }, + { + "epoch": 0.86, + "learning_rate": 2.047879458537465e-06, + "loss": 1.2794, + "step": 4084 + }, + { + "epoch": 0.86, + "learning_rate": 2.0418770774995034e-06, + "loss": 1.2047, + "step": 4085 + }, + { + "epoch": 0.86, + "learning_rate": 2.0358830327056633e-06, + "loss": 1.2397, + "step": 4086 + }, + { + "epoch": 0.86, + "learning_rate": 2.0298973269384037e-06, + "loss": 1.2516, + "step": 4087 + }, + { + "epoch": 0.86, + "learning_rate": 2.023919962976324e-06, + "loss": 1.2235, + "step": 4088 + }, + { + "epoch": 0.86, + "learning_rate": 2.0179509435941403e-06, + "loss": 1.1876, + "step": 4089 + }, + { + "epoch": 0.86, + "learning_rate": 2.011990271562696e-06, + "loss": 1.2351, + "step": 4090 + }, + { + "epoch": 0.86, + "learning_rate": 2.006037949648971e-06, + "loss": 1.2192, + "step": 4091 + }, + { + "epoch": 0.86, + "learning_rate": 2.000093980616051e-06, + "loss": 1.2409, + "step": 4092 + }, + { + "epoch": 0.86, + "learning_rate": 1.9941583672231624e-06, + "loss": 1.2603, + "step": 4093 + }, + { + "epoch": 0.86, + "learning_rate": 1.9882311122256425e-06, + "loss": 1.2002, + "step": 4094 + }, + { + "epoch": 0.86, + "learning_rate": 1.9823122183749443e-06, + "loss": 1.2564, + "step": 4095 + }, + { + "epoch": 0.86, + "learning_rate": 1.9764016884186545e-06, + "loss": 1.1684, + "step": 4096 + }, + { + "epoch": 0.86, + "learning_rate": 1.9704995251004622e-06, + "loss": 1.2472, + "step": 4097 + }, + { + "epoch": 0.86, + "learning_rate": 1.9646057311601853e-06, + "loss": 1.2206, + "step": 4098 + }, + { + "epoch": 0.86, + "learning_rate": 1.958720309333746e-06, + "loss": 1.1955, + "step": 4099 + }, + { + "epoch": 0.86, + "learning_rate": 1.952843262353181e-06, + "loss": 1.1952, + "step": 4100 + }, + { + "epoch": 0.86, + "learning_rate": 1.946974592946651e-06, + "loss": 1.212, + "step": 4101 + }, + { + "epoch": 0.86, + "learning_rate": 1.9411143038384163e-06, + "loss": 1.1871, + "step": 4102 + }, + { + "epoch": 0.86, + "learning_rate": 1.935262397748845e-06, + "loss": 1.2422, + "step": 4103 + }, + { + "epoch": 0.86, + "learning_rate": 1.929418877394429e-06, + "loss": 1.1622, + "step": 4104 + }, + { + "epoch": 0.86, + "learning_rate": 1.923583745487747e-06, + "loss": 1.2082, + "step": 4105 + }, + { + "epoch": 0.86, + "learning_rate": 1.917757004737506e-06, + "loss": 1.2459, + "step": 4106 + }, + { + "epoch": 0.86, + "learning_rate": 1.9119386578484934e-06, + "loss": 1.2342, + "step": 4107 + }, + { + "epoch": 0.86, + "learning_rate": 1.906128707521624e-06, + "loss": 1.2289, + "step": 4108 + }, + { + "epoch": 0.86, + "learning_rate": 1.900327156453896e-06, + "loss": 1.2199, + "step": 4109 + }, + { + "epoch": 0.86, + "learning_rate": 1.894534007338422e-06, + "loss": 1.221, + "step": 4110 + }, + { + "epoch": 0.86, + "learning_rate": 1.8887492628644022e-06, + "loss": 1.1752, + "step": 4111 + }, + { + "epoch": 0.86, + "learning_rate": 1.8829729257171503e-06, + "loss": 1.2397, + "step": 4112 + }, + { + "epoch": 0.87, + "learning_rate": 1.8772049985780616e-06, + "loss": 1.216, + "step": 4113 + }, + { + "epoch": 0.87, + "learning_rate": 1.871445484124641e-06, + "loss": 1.2169, + "step": 4114 + }, + { + "epoch": 0.87, + "learning_rate": 1.8656943850304765e-06, + "loss": 1.2489, + "step": 4115 + }, + { + "epoch": 0.87, + "learning_rate": 1.8599517039652548e-06, + "loss": 1.2228, + "step": 4116 + }, + { + "epoch": 0.87, + "learning_rate": 1.8542174435947614e-06, + "loss": 1.2122, + "step": 4117 + }, + { + "epoch": 0.87, + "learning_rate": 1.8484916065808622e-06, + "loss": 1.2304, + "step": 4118 + }, + { + "epoch": 0.87, + "learning_rate": 1.8427741955815138e-06, + "loss": 1.2397, + "step": 4119 + }, + { + "epoch": 0.87, + "learning_rate": 1.8370652132507705e-06, + "loss": 1.1906, + "step": 4120 + }, + { + "epoch": 0.87, + "learning_rate": 1.8313646622387639e-06, + "loss": 1.2633, + "step": 4121 + }, + { + "epoch": 0.87, + "learning_rate": 1.8256725451917233e-06, + "loss": 1.2508, + "step": 4122 + }, + { + "epoch": 0.87, + "learning_rate": 1.8199888647519537e-06, + "loss": 1.1996, + "step": 4123 + }, + { + "epoch": 0.87, + "learning_rate": 1.8143136235578374e-06, + "loss": 1.2186, + "step": 4124 + }, + { + "epoch": 0.87, + "learning_rate": 1.8086468242438582e-06, + "loss": 1.1936, + "step": 4125 + }, + { + "epoch": 0.87, + "learning_rate": 1.8029884694405631e-06, + "loss": 1.3133, + "step": 4126 + }, + { + "epoch": 0.87, + "learning_rate": 1.7973385617745953e-06, + "loss": 1.1846, + "step": 4127 + }, + { + "epoch": 0.87, + "learning_rate": 1.7916971038686614e-06, + "loss": 1.2062, + "step": 4128 + }, + { + "epoch": 0.87, + "learning_rate": 1.7860640983415533e-06, + "loss": 1.2634, + "step": 4129 + }, + { + "epoch": 0.87, + "learning_rate": 1.7804395478081416e-06, + "loss": 1.2399, + "step": 4130 + }, + { + "epoch": 0.87, + "learning_rate": 1.774823454879362e-06, + "loss": 1.2037, + "step": 4131 + }, + { + "epoch": 0.87, + "learning_rate": 1.7692158221622379e-06, + "loss": 1.1931, + "step": 4132 + }, + { + "epoch": 0.87, + "learning_rate": 1.763616652259854e-06, + "loss": 1.2231, + "step": 4133 + }, + { + "epoch": 0.87, + "learning_rate": 1.758025947771378e-06, + "loss": 1.2224, + "step": 4134 + }, + { + "epoch": 0.87, + "learning_rate": 1.752443711292029e-06, + "loss": 1.1991, + "step": 4135 + }, + { + "epoch": 0.87, + "learning_rate": 1.7468699454131211e-06, + "loss": 1.2411, + "step": 4136 + }, + { + "epoch": 0.87, + "learning_rate": 1.741304652722009e-06, + "loss": 1.2054, + "step": 4137 + }, + { + "epoch": 0.87, + "learning_rate": 1.7357478358021374e-06, + "loss": 1.2458, + "step": 4138 + }, + { + "epoch": 0.87, + "learning_rate": 1.7301994972330028e-06, + "loss": 1.2315, + "step": 4139 + }, + { + "epoch": 0.87, + "learning_rate": 1.724659639590167e-06, + "loss": 1.2182, + "step": 4140 + }, + { + "epoch": 0.87, + "learning_rate": 1.7191282654452646e-06, + "loss": 1.1663, + "step": 4141 + }, + { + "epoch": 0.87, + "learning_rate": 1.7136053773659766e-06, + "loss": 1.2193, + "step": 4142 + }, + { + "epoch": 0.87, + "learning_rate": 1.7080909779160615e-06, + "loss": 1.269, + "step": 4143 + }, + { + "epoch": 0.87, + "learning_rate": 1.7025850696553248e-06, + "loss": 1.2466, + "step": 4144 + }, + { + "epoch": 0.87, + "learning_rate": 1.6970876551396309e-06, + "loss": 1.2437, + "step": 4145 + }, + { + "epoch": 0.87, + "learning_rate": 1.6915987369209142e-06, + "loss": 1.2028, + "step": 4146 + }, + { + "epoch": 0.87, + "learning_rate": 1.6861183175471495e-06, + "loss": 1.2666, + "step": 4147 + }, + { + "epoch": 0.87, + "learning_rate": 1.6806463995623735e-06, + "loss": 1.2602, + "step": 4148 + }, + { + "epoch": 0.87, + "learning_rate": 1.6751829855066804e-06, + "loss": 1.1981, + "step": 4149 + }, + { + "epoch": 0.87, + "learning_rate": 1.669728077916206e-06, + "loss": 1.2195, + "step": 4150 + }, + { + "epoch": 0.87, + "learning_rate": 1.6642816793231499e-06, + "loss": 1.2094, + "step": 4151 + }, + { + "epoch": 0.87, + "learning_rate": 1.6588437922557533e-06, + "loss": 1.2068, + "step": 4152 + }, + { + "epoch": 0.87, + "learning_rate": 1.6534144192383038e-06, + "loss": 1.2506, + "step": 4153 + }, + { + "epoch": 0.87, + "learning_rate": 1.6479935627911481e-06, + "loss": 1.2311, + "step": 4154 + }, + { + "epoch": 0.87, + "learning_rate": 1.6425812254306707e-06, + "loss": 1.2384, + "step": 4155 + }, + { + "epoch": 0.87, + "learning_rate": 1.637177409669304e-06, + "loss": 1.2, + "step": 4156 + }, + { + "epoch": 0.87, + "learning_rate": 1.6317821180155214e-06, + "loss": 1.2337, + "step": 4157 + }, + { + "epoch": 0.87, + "learning_rate": 1.6263953529738464e-06, + "loss": 1.2668, + "step": 4158 + }, + { + "epoch": 0.87, + "learning_rate": 1.621017117044843e-06, + "loss": 1.2193, + "step": 4159 + }, + { + "epoch": 0.88, + "learning_rate": 1.6156474127251077e-06, + "loss": 1.2489, + "step": 4160 + }, + { + "epoch": 0.88, + "learning_rate": 1.6102862425072818e-06, + "loss": 1.2418, + "step": 4161 + }, + { + "epoch": 0.88, + "learning_rate": 1.6049336088800505e-06, + "loss": 1.215, + "step": 4162 + }, + { + "epoch": 0.88, + "learning_rate": 1.5995895143281236e-06, + "loss": 1.2294, + "step": 4163 + }, + { + "epoch": 0.88, + "learning_rate": 1.5942539613322638e-06, + "loss": 1.1854, + "step": 4164 + }, + { + "epoch": 0.88, + "learning_rate": 1.5889269523692541e-06, + "loss": 1.205, + "step": 4165 + }, + { + "epoch": 0.88, + "learning_rate": 1.5836084899119165e-06, + "loss": 1.1918, + "step": 4166 + }, + { + "epoch": 0.88, + "learning_rate": 1.5782985764291091e-06, + "loss": 1.2247, + "step": 4167 + }, + { + "epoch": 0.88, + "learning_rate": 1.5729972143857164e-06, + "loss": 1.2258, + "step": 4168 + }, + { + "epoch": 0.88, + "learning_rate": 1.567704406242654e-06, + "loss": 1.1779, + "step": 4169 + }, + { + "epoch": 0.88, + "learning_rate": 1.5624201544568717e-06, + "loss": 1.2531, + "step": 4170 + }, + { + "epoch": 0.88, + "learning_rate": 1.557144461481337e-06, + "loss": 1.1995, + "step": 4171 + }, + { + "epoch": 0.88, + "learning_rate": 1.5518773297650613e-06, + "loss": 1.2137, + "step": 4172 + }, + { + "epoch": 0.88, + "learning_rate": 1.5466187617530647e-06, + "loss": 1.2479, + "step": 4173 + }, + { + "epoch": 0.88, + "learning_rate": 1.541368759886397e-06, + "loss": 1.0979, + "step": 4174 + }, + { + "epoch": 0.88, + "learning_rate": 1.5361273266021392e-06, + "loss": 1.2153, + "step": 4175 + }, + { + "epoch": 0.88, + "learning_rate": 1.5308944643333857e-06, + "loss": 1.2625, + "step": 4176 + }, + { + "epoch": 0.88, + "learning_rate": 1.5256701755092574e-06, + "loss": 1.2282, + "step": 4177 + }, + { + "epoch": 0.88, + "learning_rate": 1.5204544625548922e-06, + "loss": 1.2628, + "step": 4178 + }, + { + "epoch": 0.88, + "learning_rate": 1.5152473278914447e-06, + "loss": 1.2377, + "step": 4179 + }, + { + "epoch": 0.88, + "learning_rate": 1.5100487739360993e-06, + "loss": 1.2581, + "step": 4180 + }, + { + "epoch": 0.88, + "learning_rate": 1.5048588031020405e-06, + "loss": 1.228, + "step": 4181 + }, + { + "epoch": 0.88, + "learning_rate": 1.4996774177984818e-06, + "loss": 1.2054, + "step": 4182 + }, + { + "epoch": 0.88, + "learning_rate": 1.494504620430648e-06, + "loss": 1.25, + "step": 4183 + }, + { + "epoch": 0.88, + "learning_rate": 1.489340413399769e-06, + "loss": 1.2274, + "step": 4184 + }, + { + "epoch": 0.88, + "learning_rate": 1.484184799103101e-06, + "loss": 1.2219, + "step": 4185 + }, + { + "epoch": 0.88, + "learning_rate": 1.4790377799339007e-06, + "loss": 1.2518, + "step": 4186 + }, + { + "epoch": 0.88, + "learning_rate": 1.4738993582814343e-06, + "loss": 1.2306, + "step": 4187 + }, + { + "epoch": 0.88, + "learning_rate": 1.4687695365309895e-06, + "loss": 1.2787, + "step": 4188 + }, + { + "epoch": 0.88, + "learning_rate": 1.4636483170638505e-06, + "loss": 1.2192, + "step": 4189 + }, + { + "epoch": 0.88, + "learning_rate": 1.4585357022573043e-06, + "loss": 1.2671, + "step": 4190 + }, + { + "epoch": 0.88, + "learning_rate": 1.4534316944846595e-06, + "loss": 1.2599, + "step": 4191 + }, + { + "epoch": 0.88, + "learning_rate": 1.4483362961152114e-06, + "loss": 1.1853, + "step": 4192 + }, + { + "epoch": 0.88, + "learning_rate": 1.4432495095142796e-06, + "loss": 1.2101, + "step": 4193 + }, + { + "epoch": 0.88, + "learning_rate": 1.438171337043164e-06, + "loss": 1.2596, + "step": 4194 + }, + { + "epoch": 0.88, + "learning_rate": 1.4331017810591764e-06, + "loss": 1.2582, + "step": 4195 + }, + { + "epoch": 0.88, + "learning_rate": 1.4280408439156369e-06, + "loss": 1.2308, + "step": 4196 + }, + { + "epoch": 0.88, + "learning_rate": 1.4229885279618461e-06, + "loss": 1.2049, + "step": 4197 + }, + { + "epoch": 0.88, + "learning_rate": 1.4179448355431168e-06, + "loss": 1.2557, + "step": 4198 + }, + { + "epoch": 0.88, + "learning_rate": 1.4129097690007543e-06, + "loss": 1.2506, + "step": 4199 + }, + { + "epoch": 0.88, + "learning_rate": 1.4078833306720573e-06, + "loss": 1.216, + "step": 4200 + }, + { + "epoch": 0.88, + "learning_rate": 1.4028655228903286e-06, + "loss": 1.2186, + "step": 4201 + }, + { + "epoch": 0.88, + "learning_rate": 1.3978563479848538e-06, + "loss": 1.2294, + "step": 4202 + }, + { + "epoch": 0.88, + "learning_rate": 1.3928558082809107e-06, + "loss": 1.2112, + "step": 4203 + }, + { + "epoch": 0.88, + "learning_rate": 1.3878639060997822e-06, + "loss": 1.2523, + "step": 4204 + }, + { + "epoch": 0.88, + "learning_rate": 1.3828806437587216e-06, + "loss": 1.3306, + "step": 4205 + }, + { + "epoch": 0.88, + "learning_rate": 1.3779060235709918e-06, + "loss": 1.2433, + "step": 4206 + }, + { + "epoch": 0.88, + "learning_rate": 1.3729400478458322e-06, + "loss": 1.1959, + "step": 4207 + }, + { + "epoch": 0.89, + "learning_rate": 1.3679827188884675e-06, + "loss": 1.2551, + "step": 4208 + }, + { + "epoch": 0.89, + "learning_rate": 1.3630340390001195e-06, + "loss": 1.1318, + "step": 4209 + }, + { + "epoch": 0.89, + "learning_rate": 1.358094010477986e-06, + "loss": 1.2404, + "step": 4210 + }, + { + "epoch": 0.89, + "learning_rate": 1.353162635615246e-06, + "loss": 1.2242, + "step": 4211 + }, + { + "epoch": 0.89, + "learning_rate": 1.3482399167010752e-06, + "loss": 1.2531, + "step": 4212 + }, + { + "epoch": 0.89, + "learning_rate": 1.3433258560206165e-06, + "loss": 1.2312, + "step": 4213 + }, + { + "epoch": 0.89, + "learning_rate": 1.3384204558550028e-06, + "loss": 1.2286, + "step": 4214 + }, + { + "epoch": 0.89, + "learning_rate": 1.333523718481342e-06, + "loss": 1.1752, + "step": 4215 + }, + { + "epoch": 0.89, + "learning_rate": 1.3286356461727202e-06, + "loss": 1.1919, + "step": 4216 + }, + { + "epoch": 0.89, + "learning_rate": 1.3237562411982086e-06, + "loss": 1.2456, + "step": 4217 + }, + { + "epoch": 0.89, + "learning_rate": 1.3188855058228468e-06, + "loss": 1.2068, + "step": 4218 + }, + { + "epoch": 0.89, + "learning_rate": 1.3140234423076504e-06, + "loss": 1.2077, + "step": 4219 + }, + { + "epoch": 0.89, + "learning_rate": 1.3091700529096186e-06, + "loss": 1.2678, + "step": 4220 + }, + { + "epoch": 0.89, + "learning_rate": 1.304325339881709e-06, + "loss": 1.2467, + "step": 4221 + }, + { + "epoch": 0.89, + "learning_rate": 1.2994893054728653e-06, + "loss": 1.2708, + "step": 4222 + }, + { + "epoch": 0.89, + "learning_rate": 1.2946619519279979e-06, + "loss": 1.2972, + "step": 4223 + }, + { + "epoch": 0.89, + "learning_rate": 1.2898432814879813e-06, + "loss": 1.2105, + "step": 4224 + }, + { + "epoch": 0.89, + "learning_rate": 1.2850332963896706e-06, + "loss": 1.2809, + "step": 4225 + }, + { + "epoch": 0.89, + "learning_rate": 1.2802319988658818e-06, + "loss": 1.2574, + "step": 4226 + }, + { + "epoch": 0.89, + "learning_rate": 1.2754393911453944e-06, + "loss": 1.2115, + "step": 4227 + }, + { + "epoch": 0.89, + "learning_rate": 1.2706554754529665e-06, + "loss": 1.2213, + "step": 4228 + }, + { + "epoch": 0.89, + "learning_rate": 1.2658802540093084e-06, + "loss": 1.2547, + "step": 4229 + }, + { + "epoch": 0.89, + "learning_rate": 1.2611137290311003e-06, + "loss": 1.2278, + "step": 4230 + }, + { + "epoch": 0.89, + "learning_rate": 1.2563559027309925e-06, + "loss": 1.1846, + "step": 4231 + }, + { + "epoch": 0.89, + "learning_rate": 1.2516067773175822e-06, + "loss": 1.2716, + "step": 4232 + }, + { + "epoch": 0.89, + "learning_rate": 1.2468663549954397e-06, + "loss": 1.2317, + "step": 4233 + }, + { + "epoch": 0.89, + "learning_rate": 1.2421346379650868e-06, + "loss": 1.2159, + "step": 4234 + }, + { + "epoch": 0.89, + "learning_rate": 1.2374116284230153e-06, + "loss": 1.2525, + "step": 4235 + }, + { + "epoch": 0.89, + "learning_rate": 1.2326973285616628e-06, + "loss": 1.2559, + "step": 4236 + }, + { + "epoch": 0.89, + "learning_rate": 1.2279917405694298e-06, + "loss": 1.239, + "step": 4237 + }, + { + "epoch": 0.89, + "learning_rate": 1.2232948666306732e-06, + "loss": 1.2, + "step": 4238 + }, + { + "epoch": 0.89, + "learning_rate": 1.218606708925707e-06, + "loss": 1.2679, + "step": 4239 + }, + { + "epoch": 0.89, + "learning_rate": 1.2139272696307857e-06, + "loss": 1.2491, + "step": 4240 + }, + { + "epoch": 0.89, + "learning_rate": 1.2092565509181386e-06, + "loss": 1.2268, + "step": 4241 + }, + { + "epoch": 0.89, + "learning_rate": 1.2045945549559269e-06, + "loss": 1.2602, + "step": 4242 + }, + { + "epoch": 0.89, + "learning_rate": 1.1999412839082748e-06, + "loss": 1.1739, + "step": 4243 + }, + { + "epoch": 0.89, + "learning_rate": 1.1952967399352522e-06, + "loss": 1.232, + "step": 4244 + }, + { + "epoch": 0.89, + "learning_rate": 1.1906609251928746e-06, + "loss": 1.2015, + "step": 4245 + }, + { + "epoch": 0.89, + "learning_rate": 1.186033841833112e-06, + "loss": 1.2532, + "step": 4246 + }, + { + "epoch": 0.89, + "learning_rate": 1.1814154920038789e-06, + "loss": 1.2721, + "step": 4247 + }, + { + "epoch": 0.89, + "learning_rate": 1.17680587784903e-06, + "loss": 1.1803, + "step": 4248 + }, + { + "epoch": 0.89, + "learning_rate": 1.1722050015083752e-06, + "loss": 1.2556, + "step": 4249 + }, + { + "epoch": 0.89, + "learning_rate": 1.1676128651176578e-06, + "loss": 1.1844, + "step": 4250 + }, + { + "epoch": 0.89, + "learning_rate": 1.163029470808572e-06, + "loss": 1.2093, + "step": 4251 + }, + { + "epoch": 0.89, + "learning_rate": 1.1584548207087498e-06, + "loss": 1.2231, + "step": 4252 + }, + { + "epoch": 0.89, + "learning_rate": 1.1538889169417654e-06, + "loss": 1.2151, + "step": 4253 + }, + { + "epoch": 0.89, + "learning_rate": 1.1493317616271327e-06, + "loss": 1.2287, + "step": 4254 + }, + { + "epoch": 0.89, + "learning_rate": 1.1447833568803036e-06, + "loss": 1.2537, + "step": 4255 + }, + { + "epoch": 0.9, + "learning_rate": 1.140243704812667e-06, + "loss": 1.1861, + "step": 4256 + }, + { + "epoch": 0.9, + "learning_rate": 1.1357128075315572e-06, + "loss": 1.221, + "step": 4257 + }, + { + "epoch": 0.9, + "learning_rate": 1.1311906671402274e-06, + "loss": 1.2518, + "step": 4258 + }, + { + "epoch": 0.9, + "learning_rate": 1.126677285737887e-06, + "loss": 1.2049, + "step": 4259 + }, + { + "epoch": 0.9, + "learning_rate": 1.122172665419663e-06, + "loss": 1.2403, + "step": 4260 + }, + { + "epoch": 0.9, + "learning_rate": 1.1176768082766177e-06, + "loss": 1.1923, + "step": 4261 + }, + { + "epoch": 0.9, + "learning_rate": 1.1131897163957573e-06, + "loss": 1.1651, + "step": 4262 + }, + { + "epoch": 0.9, + "learning_rate": 1.1087113918600023e-06, + "loss": 1.1937, + "step": 4263 + }, + { + "epoch": 0.9, + "learning_rate": 1.1042418367482188e-06, + "loss": 1.3132, + "step": 4264 + }, + { + "epoch": 0.9, + "learning_rate": 1.0997810531351916e-06, + "loss": 1.2421, + "step": 4265 + }, + { + "epoch": 0.9, + "learning_rate": 1.0953290430916353e-06, + "loss": 1.2078, + "step": 4266 + }, + { + "epoch": 0.9, + "learning_rate": 1.0908858086841989e-06, + "loss": 1.2188, + "step": 4267 + }, + { + "epoch": 0.9, + "learning_rate": 1.0864513519754484e-06, + "loss": 1.2179, + "step": 4268 + }, + { + "epoch": 0.9, + "learning_rate": 1.082025675023879e-06, + "loss": 1.2269, + "step": 4269 + }, + { + "epoch": 0.9, + "learning_rate": 1.077608779883912e-06, + "loss": 1.2637, + "step": 4270 + }, + { + "epoch": 0.9, + "learning_rate": 1.0732006686058893e-06, + "loss": 1.241, + "step": 4271 + }, + { + "epoch": 0.9, + "learning_rate": 1.0688013432360811e-06, + "loss": 1.2497, + "step": 4272 + }, + { + "epoch": 0.9, + "learning_rate": 1.0644108058166692e-06, + "loss": 1.2513, + "step": 4273 + }, + { + "epoch": 0.9, + "learning_rate": 1.0600290583857631e-06, + "loss": 1.2318, + "step": 4274 + }, + { + "epoch": 0.9, + "learning_rate": 1.0556561029773914e-06, + "loss": 1.1844, + "step": 4275 + }, + { + "epoch": 0.9, + "learning_rate": 1.0512919416214995e-06, + "loss": 1.2313, + "step": 4276 + }, + { + "epoch": 0.9, + "learning_rate": 1.0469365763439532e-06, + "loss": 1.207, + "step": 4277 + }, + { + "epoch": 0.9, + "learning_rate": 1.0425900091665286e-06, + "loss": 1.291, + "step": 4278 + }, + { + "epoch": 0.9, + "learning_rate": 1.0382522421069274e-06, + "loss": 1.2355, + "step": 4279 + }, + { + "epoch": 0.9, + "learning_rate": 1.033923277178759e-06, + "loss": 1.1848, + "step": 4280 + }, + { + "epoch": 0.9, + "learning_rate": 1.029603116391551e-06, + "loss": 1.2407, + "step": 4281 + }, + { + "epoch": 0.9, + "learning_rate": 1.0252917617507374e-06, + "loss": 1.2639, + "step": 4282 + }, + { + "epoch": 0.9, + "learning_rate": 1.020989215257675e-06, + "loss": 1.2355, + "step": 4283 + }, + { + "epoch": 0.9, + "learning_rate": 1.0166954789096194e-06, + "loss": 1.1912, + "step": 4284 + }, + { + "epoch": 0.9, + "learning_rate": 1.0124105546997521e-06, + "loss": 1.2359, + "step": 4285 + }, + { + "epoch": 0.9, + "learning_rate": 1.008134444617146e-06, + "loss": 1.2382, + "step": 4286 + }, + { + "epoch": 0.9, + "learning_rate": 1.0038671506467934e-06, + "loss": 1.2216, + "step": 4287 + }, + { + "epoch": 0.9, + "learning_rate": 9.996086747695966e-07, + "loss": 1.2366, + "step": 4288 + }, + { + "epoch": 0.9, + "learning_rate": 9.953590189623563e-07, + "loss": 1.2396, + "step": 4289 + }, + { + "epoch": 0.9, + "learning_rate": 9.911181851977792e-07, + "loss": 1.202, + "step": 4290 + }, + { + "epoch": 0.9, + "learning_rate": 9.868861754444858e-07, + "loss": 1.2017, + "step": 4291 + }, + { + "epoch": 0.9, + "learning_rate": 9.826629916669917e-07, + "loss": 1.2344, + "step": 4292 + }, + { + "epoch": 0.9, + "learning_rate": 9.784486358257194e-07, + "loss": 1.2402, + "step": 4293 + }, + { + "epoch": 0.9, + "learning_rate": 9.742431098769933e-07, + "loss": 1.2851, + "step": 4294 + }, + { + "epoch": 0.9, + "learning_rate": 9.700464157730338e-07, + "loss": 1.1857, + "step": 4295 + }, + { + "epoch": 0.9, + "learning_rate": 9.658585554619737e-07, + "loss": 1.2451, + "step": 4296 + }, + { + "epoch": 0.9, + "learning_rate": 9.616795308878313e-07, + "loss": 1.2263, + "step": 4297 + }, + { + "epoch": 0.9, + "learning_rate": 9.575093439905259e-07, + "loss": 1.236, + "step": 4298 + }, + { + "epoch": 0.9, + "learning_rate": 9.533479967058867e-07, + "loss": 1.2494, + "step": 4299 + }, + { + "epoch": 0.9, + "learning_rate": 9.491954909656242e-07, + "loss": 1.1863, + "step": 4300 + }, + { + "epoch": 0.9, + "learning_rate": 9.450518286973542e-07, + "loss": 1.2832, + "step": 4301 + }, + { + "epoch": 0.9, + "learning_rate": 9.409170118245803e-07, + "loss": 1.2413, + "step": 4302 + }, + { + "epoch": 0.91, + "learning_rate": 9.36791042266707e-07, + "loss": 1.2495, + "step": 4303 + }, + { + "epoch": 0.91, + "learning_rate": 9.326739219390246e-07, + "loss": 1.224, + "step": 4304 + }, + { + "epoch": 0.91, + "learning_rate": 9.285656527527264e-07, + "loss": 1.23, + "step": 4305 + }, + { + "epoch": 0.91, + "learning_rate": 9.244662366148826e-07, + "loss": 1.2561, + "step": 4306 + }, + { + "epoch": 0.91, + "learning_rate": 9.203756754284665e-07, + "loss": 1.2677, + "step": 4307 + }, + { + "epoch": 0.91, + "learning_rate": 9.162939710923324e-07, + "loss": 1.2469, + "step": 4308 + }, + { + "epoch": 0.91, + "learning_rate": 9.122211255012292e-07, + "loss": 1.1916, + "step": 4309 + }, + { + "epoch": 0.91, + "learning_rate": 9.081571405457912e-07, + "loss": 1.2088, + "step": 4310 + }, + { + "epoch": 0.91, + "learning_rate": 9.041020181125315e-07, + "loss": 1.2659, + "step": 4311 + }, + { + "epoch": 0.91, + "learning_rate": 9.000557600838666e-07, + "loss": 1.26, + "step": 4312 + }, + { + "epoch": 0.91, + "learning_rate": 8.960183683380807e-07, + "loss": 1.2366, + "step": 4313 + }, + { + "epoch": 0.91, + "learning_rate": 8.919898447493569e-07, + "loss": 1.2412, + "step": 4314 + }, + { + "epoch": 0.91, + "learning_rate": 8.879701911877503e-07, + "loss": 1.2653, + "step": 4315 + }, + { + "epoch": 0.91, + "learning_rate": 8.839594095191995e-07, + "loss": 1.1935, + "step": 4316 + }, + { + "epoch": 0.91, + "learning_rate": 8.799575016055373e-07, + "loss": 1.2208, + "step": 4317 + }, + { + "epoch": 0.91, + "learning_rate": 8.7596446930446e-07, + "loss": 1.2009, + "step": 4318 + }, + { + "epoch": 0.91, + "learning_rate": 8.719803144695516e-07, + "loss": 1.2297, + "step": 4319 + }, + { + "epoch": 0.91, + "learning_rate": 8.680050389502814e-07, + "loss": 1.258, + "step": 4320 + }, + { + "epoch": 0.91, + "learning_rate": 8.640386445919847e-07, + "loss": 1.2318, + "step": 4321 + }, + { + "epoch": 0.91, + "learning_rate": 8.600811332358861e-07, + "loss": 1.2014, + "step": 4322 + }, + { + "epoch": 0.91, + "learning_rate": 8.561325067190762e-07, + "loss": 1.2076, + "step": 4323 + }, + { + "epoch": 0.91, + "learning_rate": 8.521927668745244e-07, + "loss": 1.1566, + "step": 4324 + }, + { + "epoch": 0.91, + "learning_rate": 8.482619155310812e-07, + "loss": 1.2294, + "step": 4325 + }, + { + "epoch": 0.91, + "learning_rate": 8.443399545134623e-07, + "loss": 1.1682, + "step": 4326 + }, + { + "epoch": 0.91, + "learning_rate": 8.404268856422626e-07, + "loss": 1.2093, + "step": 4327 + }, + { + "epoch": 0.91, + "learning_rate": 8.365227107339447e-07, + "loss": 1.1701, + "step": 4328 + }, + { + "epoch": 0.91, + "learning_rate": 8.326274316008475e-07, + "loss": 1.2414, + "step": 4329 + }, + { + "epoch": 0.91, + "learning_rate": 8.287410500511739e-07, + "loss": 1.1983, + "step": 4330 + }, + { + "epoch": 0.91, + "learning_rate": 8.248635678890049e-07, + "loss": 1.214, + "step": 4331 + }, + { + "epoch": 0.91, + "learning_rate": 8.209949869142808e-07, + "loss": 1.2238, + "step": 4332 + }, + { + "epoch": 0.91, + "learning_rate": 8.171353089228206e-07, + "loss": 1.2027, + "step": 4333 + }, + { + "epoch": 0.91, + "learning_rate": 8.132845357062979e-07, + "loss": 1.2203, + "step": 4334 + }, + { + "epoch": 0.91, + "learning_rate": 8.094426690522672e-07, + "loss": 1.2504, + "step": 4335 + }, + { + "epoch": 0.91, + "learning_rate": 8.056097107441352e-07, + "loss": 1.2676, + "step": 4336 + }, + { + "epoch": 0.91, + "learning_rate": 8.017856625611809e-07, + "loss": 1.2207, + "step": 4337 + }, + { + "epoch": 0.91, + "learning_rate": 7.979705262785442e-07, + "loss": 1.2061, + "step": 4338 + }, + { + "epoch": 0.91, + "learning_rate": 7.941643036672309e-07, + "loss": 1.2271, + "step": 4339 + }, + { + "epoch": 0.91, + "learning_rate": 7.903669964941052e-07, + "loss": 1.2185, + "step": 4340 + }, + { + "epoch": 0.91, + "learning_rate": 7.865786065218973e-07, + "loss": 1.2584, + "step": 4341 + }, + { + "epoch": 0.91, + "learning_rate": 7.827991355091891e-07, + "loss": 1.2262, + "step": 4342 + }, + { + "epoch": 0.91, + "learning_rate": 7.790285852104373e-07, + "loss": 1.2416, + "step": 4343 + }, + { + "epoch": 0.91, + "learning_rate": 7.752669573759464e-07, + "loss": 1.2643, + "step": 4344 + }, + { + "epoch": 0.91, + "learning_rate": 7.715142537518771e-07, + "loss": 1.1716, + "step": 4345 + }, + { + "epoch": 0.91, + "learning_rate": 7.677704760802562e-07, + "loss": 1.2065, + "step": 4346 + }, + { + "epoch": 0.91, + "learning_rate": 7.640356260989601e-07, + "loss": 1.2274, + "step": 4347 + }, + { + "epoch": 0.91, + "learning_rate": 7.603097055417242e-07, + "loss": 1.24, + "step": 4348 + }, + { + "epoch": 0.91, + "learning_rate": 7.565927161381403e-07, + "loss": 1.2557, + "step": 4349 + }, + { + "epoch": 0.91, + "learning_rate": 7.528846596136485e-07, + "loss": 1.2253, + "step": 4350 + }, + { + "epoch": 0.92, + "learning_rate": 7.491855376895519e-07, + "loss": 1.2702, + "step": 4351 + }, + { + "epoch": 0.92, + "learning_rate": 7.454953520829899e-07, + "loss": 1.2337, + "step": 4352 + }, + { + "epoch": 0.92, + "learning_rate": 7.418141045069727e-07, + "loss": 1.2667, + "step": 4353 + }, + { + "epoch": 0.92, + "learning_rate": 7.381417966703508e-07, + "loss": 1.243, + "step": 4354 + }, + { + "epoch": 0.92, + "learning_rate": 7.344784302778274e-07, + "loss": 1.2792, + "step": 4355 + }, + { + "epoch": 0.92, + "learning_rate": 7.308240070299489e-07, + "loss": 1.206, + "step": 4356 + }, + { + "epoch": 0.92, + "learning_rate": 7.271785286231204e-07, + "loss": 1.2438, + "step": 4357 + }, + { + "epoch": 0.92, + "learning_rate": 7.235419967495883e-07, + "loss": 1.2203, + "step": 4358 + }, + { + "epoch": 0.92, + "learning_rate": 7.199144130974489e-07, + "loss": 1.2486, + "step": 4359 + }, + { + "epoch": 0.92, + "learning_rate": 7.16295779350642e-07, + "loss": 1.2434, + "step": 4360 + }, + { + "epoch": 0.92, + "learning_rate": 7.126860971889527e-07, + "loss": 1.2417, + "step": 4361 + }, + { + "epoch": 0.92, + "learning_rate": 7.090853682880161e-07, + "loss": 1.2403, + "step": 4362 + }, + { + "epoch": 0.92, + "learning_rate": 7.054935943193042e-07, + "loss": 1.2814, + "step": 4363 + }, + { + "epoch": 0.92, + "learning_rate": 7.019107769501366e-07, + "loss": 1.2591, + "step": 4364 + }, + { + "epoch": 0.92, + "learning_rate": 6.983369178436739e-07, + "loss": 1.3018, + "step": 4365 + }, + { + "epoch": 0.92, + "learning_rate": 6.947720186589158e-07, + "loss": 1.2351, + "step": 4366 + }, + { + "epoch": 0.92, + "learning_rate": 6.912160810507096e-07, + "loss": 1.2228, + "step": 4367 + }, + { + "epoch": 0.92, + "learning_rate": 6.876691066697349e-07, + "loss": 1.2691, + "step": 4368 + }, + { + "epoch": 0.92, + "learning_rate": 6.841310971625103e-07, + "loss": 1.2701, + "step": 4369 + }, + { + "epoch": 0.92, + "learning_rate": 6.806020541714042e-07, + "loss": 1.2439, + "step": 4370 + }, + { + "epoch": 0.92, + "learning_rate": 6.770819793346084e-07, + "loss": 1.2196, + "step": 4371 + }, + { + "epoch": 0.92, + "learning_rate": 6.735708742861624e-07, + "loss": 1.2263, + "step": 4372 + }, + { + "epoch": 0.92, + "learning_rate": 6.700687406559359e-07, + "loss": 1.2102, + "step": 4373 + }, + { + "epoch": 0.92, + "learning_rate": 6.665755800696305e-07, + "loss": 1.2618, + "step": 4374 + }, + { + "epoch": 0.92, + "learning_rate": 6.63091394148796e-07, + "loss": 1.1802, + "step": 4375 + }, + { + "epoch": 0.92, + "learning_rate": 6.596161845108006e-07, + "loss": 1.2205, + "step": 4376 + }, + { + "epoch": 0.92, + "learning_rate": 6.561499527688586e-07, + "loss": 1.1975, + "step": 4377 + }, + { + "epoch": 0.92, + "learning_rate": 6.526927005320072e-07, + "loss": 1.1822, + "step": 4378 + }, + { + "epoch": 0.92, + "learning_rate": 6.492444294051204e-07, + "loss": 1.2436, + "step": 4379 + }, + { + "epoch": 0.92, + "learning_rate": 6.458051409889021e-07, + "loss": 1.2051, + "step": 4380 + }, + { + "epoch": 0.92, + "learning_rate": 6.423748368798843e-07, + "loss": 1.2292, + "step": 4381 + }, + { + "epoch": 0.92, + "learning_rate": 6.38953518670431e-07, + "loss": 1.2116, + "step": 4382 + }, + { + "epoch": 0.92, + "learning_rate": 6.355411879487339e-07, + "loss": 1.2199, + "step": 4383 + }, + { + "epoch": 0.92, + "learning_rate": 6.321378462988148e-07, + "loss": 1.2384, + "step": 4384 + }, + { + "epoch": 0.92, + "learning_rate": 6.287434953005145e-07, + "loss": 1.2618, + "step": 4385 + }, + { + "epoch": 0.92, + "learning_rate": 6.253581365295148e-07, + "loss": 1.2689, + "step": 4386 + }, + { + "epoch": 0.92, + "learning_rate": 6.219817715573073e-07, + "loss": 1.2113, + "step": 4387 + }, + { + "epoch": 0.92, + "learning_rate": 6.18614401951223e-07, + "loss": 1.2616, + "step": 4388 + }, + { + "epoch": 0.92, + "learning_rate": 6.152560292744091e-07, + "loss": 1.2255, + "step": 4389 + }, + { + "epoch": 0.92, + "learning_rate": 6.119066550858321e-07, + "loss": 1.2419, + "step": 4390 + }, + { + "epoch": 0.92, + "learning_rate": 6.085662809402926e-07, + "loss": 1.2623, + "step": 4391 + }, + { + "epoch": 0.92, + "learning_rate": 6.052349083884057e-07, + "loss": 1.2152, + "step": 4392 + }, + { + "epoch": 0.92, + "learning_rate": 6.019125389766123e-07, + "loss": 1.2353, + "step": 4393 + }, + { + "epoch": 0.92, + "learning_rate": 5.985991742471698e-07, + "loss": 1.2339, + "step": 4394 + }, + { + "epoch": 0.92, + "learning_rate": 5.952948157381566e-07, + "loss": 1.1722, + "step": 4395 + }, + { + "epoch": 0.92, + "learning_rate": 5.919994649834748e-07, + "loss": 1.2445, + "step": 4396 + }, + { + "epoch": 0.92, + "learning_rate": 5.887131235128385e-07, + "loss": 1.2186, + "step": 4397 + }, + { + "epoch": 0.93, + "learning_rate": 5.854357928517806e-07, + "loss": 1.2297, + "step": 4398 + }, + { + "epoch": 0.93, + "learning_rate": 5.821674745216599e-07, + "loss": 1.2235, + "step": 4399 + }, + { + "epoch": 0.93, + "learning_rate": 5.789081700396381e-07, + "loss": 1.1534, + "step": 4400 + }, + { + "epoch": 0.93, + "learning_rate": 5.756578809187008e-07, + "loss": 1.2411, + "step": 4401 + }, + { + "epoch": 0.93, + "learning_rate": 5.724166086676542e-07, + "loss": 1.2196, + "step": 4402 + }, + { + "epoch": 0.93, + "learning_rate": 5.691843547911013e-07, + "loss": 1.2477, + "step": 4403 + }, + { + "epoch": 0.93, + "learning_rate": 5.6596112078948e-07, + "loss": 1.216, + "step": 4404 + }, + { + "epoch": 0.93, + "learning_rate": 5.627469081590242e-07, + "loss": 1.2527, + "step": 4405 + }, + { + "epoch": 0.93, + "learning_rate": 5.595417183917851e-07, + "loss": 1.2296, + "step": 4406 + }, + { + "epoch": 0.93, + "learning_rate": 5.563455529756301e-07, + "loss": 1.2075, + "step": 4407 + }, + { + "epoch": 0.93, + "learning_rate": 5.531584133942325e-07, + "loss": 1.2425, + "step": 4408 + }, + { + "epoch": 0.93, + "learning_rate": 5.499803011270776e-07, + "loss": 1.2671, + "step": 4409 + }, + { + "epoch": 0.93, + "learning_rate": 5.468112176494633e-07, + "loss": 1.2547, + "step": 4410 + }, + { + "epoch": 0.93, + "learning_rate": 5.43651164432486e-07, + "loss": 1.2541, + "step": 4411 + }, + { + "epoch": 0.93, + "learning_rate": 5.405001429430634e-07, + "loss": 1.195, + "step": 4412 + }, + { + "epoch": 0.93, + "learning_rate": 5.373581546439077e-07, + "loss": 1.2237, + "step": 4413 + }, + { + "epoch": 0.93, + "learning_rate": 5.342252009935522e-07, + "loss": 1.2363, + "step": 4414 + }, + { + "epoch": 0.93, + "learning_rate": 5.311012834463247e-07, + "loss": 1.2104, + "step": 4415 + }, + { + "epoch": 0.93, + "learning_rate": 5.279864034523586e-07, + "loss": 1.2381, + "step": 4416 + }, + { + "epoch": 0.93, + "learning_rate": 5.248805624576037e-07, + "loss": 1.2374, + "step": 4417 + }, + { + "epoch": 0.93, + "learning_rate": 5.217837619038002e-07, + "loss": 1.2506, + "step": 4418 + }, + { + "epoch": 0.93, + "learning_rate": 5.186960032284983e-07, + "loss": 1.2166, + "step": 4419 + }, + { + "epoch": 0.93, + "learning_rate": 5.156172878650489e-07, + "loss": 1.2131, + "step": 4420 + }, + { + "epoch": 0.93, + "learning_rate": 5.125476172426092e-07, + "loss": 1.2366, + "step": 4421 + }, + { + "epoch": 0.93, + "learning_rate": 5.094869927861323e-07, + "loss": 1.2112, + "step": 4422 + }, + { + "epoch": 0.93, + "learning_rate": 5.064354159163754e-07, + "loss": 1.2071, + "step": 4423 + }, + { + "epoch": 0.93, + "learning_rate": 5.033928880498917e-07, + "loss": 1.3022, + "step": 4424 + }, + { + "epoch": 0.93, + "learning_rate": 5.003594105990384e-07, + "loss": 1.21, + "step": 4425 + }, + { + "epoch": 0.93, + "learning_rate": 4.973349849719733e-07, + "loss": 1.1817, + "step": 4426 + }, + { + "epoch": 0.93, + "learning_rate": 4.943196125726446e-07, + "loss": 1.2437, + "step": 4427 + }, + { + "epoch": 0.93, + "learning_rate": 4.913132948008037e-07, + "loss": 1.1759, + "step": 4428 + }, + { + "epoch": 0.93, + "learning_rate": 4.883160330519965e-07, + "loss": 1.2422, + "step": 4429 + }, + { + "epoch": 0.93, + "learning_rate": 4.853278287175677e-07, + "loss": 1.2204, + "step": 4430 + }, + { + "epoch": 0.93, + "learning_rate": 4.823486831846547e-07, + "loss": 1.2376, + "step": 4431 + }, + { + "epoch": 0.93, + "learning_rate": 4.793785978361887e-07, + "loss": 1.2706, + "step": 4432 + }, + { + "epoch": 0.93, + "learning_rate": 4.764175740509025e-07, + "loss": 1.2328, + "step": 4433 + }, + { + "epoch": 0.93, + "learning_rate": 4.7346561320330997e-07, + "loss": 1.2333, + "step": 4434 + }, + { + "epoch": 0.93, + "learning_rate": 4.7052271666373053e-07, + "loss": 1.1884, + "step": 4435 + }, + { + "epoch": 0.93, + "learning_rate": 4.675888857982669e-07, + "loss": 1.2321, + "step": 4436 + }, + { + "epoch": 0.93, + "learning_rate": 4.646641219688186e-07, + "loss": 1.2524, + "step": 4437 + }, + { + "epoch": 0.93, + "learning_rate": 4.617484265330752e-07, + "loss": 1.2138, + "step": 4438 + }, + { + "epoch": 0.93, + "learning_rate": 4.588418008445161e-07, + "loss": 1.2546, + "step": 4439 + }, + { + "epoch": 0.93, + "learning_rate": 4.5594424625240887e-07, + "loss": 1.2357, + "step": 4440 + }, + { + "epoch": 0.93, + "learning_rate": 4.5305576410181293e-07, + "loss": 1.2278, + "step": 4441 + }, + { + "epoch": 0.93, + "learning_rate": 4.5017635573357366e-07, + "loss": 1.2714, + "step": 4442 + }, + { + "epoch": 0.93, + "learning_rate": 4.4730602248432843e-07, + "loss": 1.2129, + "step": 4443 + }, + { + "epoch": 0.93, + "learning_rate": 4.444447656864981e-07, + "loss": 1.2472, + "step": 4444 + }, + { + "epoch": 0.93, + "learning_rate": 4.4159258666828907e-07, + "loss": 1.2369, + "step": 4445 + }, + { + "epoch": 0.94, + "learning_rate": 4.3874948675370233e-07, + "loss": 1.2305, + "step": 4446 + }, + { + "epoch": 0.94, + "learning_rate": 4.3591546726250877e-07, + "loss": 1.234, + "step": 4447 + }, + { + "epoch": 0.94, + "learning_rate": 4.3309052951028275e-07, + "loss": 1.2171, + "step": 4448 + }, + { + "epoch": 0.94, + "learning_rate": 4.302746748083664e-07, + "loss": 1.1726, + "step": 4449 + }, + { + "epoch": 0.94, + "learning_rate": 4.2746790446389853e-07, + "loss": 1.2404, + "step": 4450 + }, + { + "epoch": 0.94, + "learning_rate": 4.2467021977978806e-07, + "loss": 1.1928, + "step": 4451 + }, + { + "epoch": 0.94, + "learning_rate": 4.218816220547406e-07, + "loss": 1.2398, + "step": 4452 + }, + { + "epoch": 0.94, + "learning_rate": 4.1910211258322954e-07, + "loss": 1.2147, + "step": 4453 + }, + { + "epoch": 0.94, + "learning_rate": 4.1633169265552274e-07, + "loss": 1.2187, + "step": 4454 + }, + { + "epoch": 0.94, + "learning_rate": 4.13570363557656e-07, + "loss": 1.2409, + "step": 4455 + }, + { + "epoch": 0.94, + "learning_rate": 4.108181265714528e-07, + "loss": 1.2315, + "step": 4456 + }, + { + "epoch": 0.94, + "learning_rate": 4.0807498297451786e-07, + "loss": 1.2053, + "step": 4457 + }, + { + "epoch": 0.94, + "learning_rate": 4.053409340402259e-07, + "loss": 1.1859, + "step": 4458 + }, + { + "epoch": 0.94, + "learning_rate": 4.026159810377417e-07, + "loss": 1.1849, + "step": 4459 + }, + { + "epoch": 0.94, + "learning_rate": 3.999001252319934e-07, + "loss": 1.2131, + "step": 4460 + }, + { + "epoch": 0.94, + "learning_rate": 3.971933678836992e-07, + "loss": 1.283, + "step": 4461 + }, + { + "epoch": 0.94, + "learning_rate": 3.944957102493474e-07, + "loss": 1.2054, + "step": 4462 + }, + { + "epoch": 0.94, + "learning_rate": 3.918071535812007e-07, + "loss": 1.2272, + "step": 4463 + }, + { + "epoch": 0.94, + "learning_rate": 3.8912769912730297e-07, + "loss": 1.2537, + "step": 4464 + }, + { + "epoch": 0.94, + "learning_rate": 3.864573481314682e-07, + "loss": 1.2086, + "step": 4465 + }, + { + "epoch": 0.94, + "learning_rate": 3.837961018332825e-07, + "loss": 1.2417, + "step": 4466 + }, + { + "epoch": 0.94, + "learning_rate": 3.811439614681156e-07, + "loss": 1.2355, + "step": 4467 + }, + { + "epoch": 0.94, + "learning_rate": 3.7850092826709817e-07, + "loss": 1.2388, + "step": 4468 + }, + { + "epoch": 0.94, + "learning_rate": 3.758670034571399e-07, + "loss": 1.222, + "step": 4469 + }, + { + "epoch": 0.94, + "learning_rate": 3.7324218826092053e-07, + "loss": 1.1949, + "step": 4470 + }, + { + "epoch": 0.94, + "learning_rate": 3.7062648389689204e-07, + "loss": 1.2616, + "step": 4471 + }, + { + "epoch": 0.94, + "learning_rate": 3.680198915792765e-07, + "loss": 1.2673, + "step": 4472 + }, + { + "epoch": 0.94, + "learning_rate": 3.654224125180661e-07, + "loss": 1.1847, + "step": 4473 + }, + { + "epoch": 0.94, + "learning_rate": 3.628340479190229e-07, + "loss": 1.253, + "step": 4474 + }, + { + "epoch": 0.94, + "learning_rate": 3.602547989836769e-07, + "loss": 1.2759, + "step": 4475 + }, + { + "epoch": 0.94, + "learning_rate": 3.5768466690933036e-07, + "loss": 1.2087, + "step": 4476 + }, + { + "epoch": 0.94, + "learning_rate": 3.551236528890445e-07, + "loss": 1.2671, + "step": 4477 + }, + { + "epoch": 0.94, + "learning_rate": 3.5257175811166166e-07, + "loss": 1.2362, + "step": 4478 + }, + { + "epoch": 0.94, + "learning_rate": 3.500289837617765e-07, + "loss": 1.2305, + "step": 4479 + }, + { + "epoch": 0.94, + "learning_rate": 3.474953310197604e-07, + "loss": 1.2287, + "step": 4480 + }, + { + "epoch": 0.94, + "learning_rate": 3.4497080106174806e-07, + "loss": 1.2224, + "step": 4481 + }, + { + "epoch": 0.94, + "learning_rate": 3.424553950596332e-07, + "loss": 1.2744, + "step": 4482 + }, + { + "epoch": 0.94, + "learning_rate": 3.3994911418108176e-07, + "loss": 1.2061, + "step": 4483 + }, + { + "epoch": 0.94, + "learning_rate": 3.374519595895209e-07, + "loss": 1.2135, + "step": 4484 + }, + { + "epoch": 0.94, + "learning_rate": 3.3496393244414114e-07, + "loss": 1.2378, + "step": 4485 + }, + { + "epoch": 0.94, + "learning_rate": 3.324850338998964e-07, + "loss": 1.244, + "step": 4486 + }, + { + "epoch": 0.94, + "learning_rate": 3.300152651075039e-07, + "loss": 1.2142, + "step": 4487 + }, + { + "epoch": 0.94, + "learning_rate": 3.2755462721344e-07, + "loss": 1.2057, + "step": 4488 + }, + { + "epoch": 0.94, + "learning_rate": 3.251031213599465e-07, + "loss": 1.2561, + "step": 4489 + }, + { + "epoch": 0.94, + "learning_rate": 3.2266074868501976e-07, + "loss": 1.2426, + "step": 4490 + }, + { + "epoch": 0.94, + "learning_rate": 3.2022751032242396e-07, + "loss": 1.2202, + "step": 4491 + }, + { + "epoch": 0.94, + "learning_rate": 3.178034074016778e-07, + "loss": 1.2431, + "step": 4492 + }, + { + "epoch": 0.95, + "learning_rate": 3.1538844104806343e-07, + "loss": 1.2423, + "step": 4493 + }, + { + "epoch": 0.95, + "learning_rate": 3.1298261238261964e-07, + "loss": 1.2351, + "step": 4494 + }, + { + "epoch": 0.95, + "learning_rate": 3.105859225221397e-07, + "loss": 1.2049, + "step": 4495 + }, + { + "epoch": 0.95, + "learning_rate": 3.0819837257918037e-07, + "loss": 1.2546, + "step": 4496 + }, + { + "epoch": 0.95, + "learning_rate": 3.058199636620529e-07, + "loss": 1.1829, + "step": 4497 + }, + { + "epoch": 0.95, + "learning_rate": 3.034506968748274e-07, + "loss": 1.2594, + "step": 4498 + }, + { + "epoch": 0.95, + "learning_rate": 3.010905733173264e-07, + "loss": 1.2462, + "step": 4499 + }, + { + "epoch": 0.95, + "learning_rate": 2.987395940851312e-07, + "loss": 1.239, + "step": 4500 + }, + { + "epoch": 0.95, + "learning_rate": 2.9639776026957777e-07, + "loss": 1.2714, + "step": 4501 + }, + { + "epoch": 0.95, + "learning_rate": 2.9406507295775657e-07, + "loss": 1.2536, + "step": 4502 + }, + { + "epoch": 0.95, + "learning_rate": 2.91741533232508e-07, + "loss": 1.2272, + "step": 4503 + }, + { + "epoch": 0.95, + "learning_rate": 2.894271421724359e-07, + "loss": 1.2041, + "step": 4504 + }, + { + "epoch": 0.95, + "learning_rate": 2.871219008518877e-07, + "loss": 1.2234, + "step": 4505 + }, + { + "epoch": 0.95, + "learning_rate": 2.8482581034096733e-07, + "loss": 1.2461, + "step": 4506 + }, + { + "epoch": 0.95, + "learning_rate": 2.825388717055311e-07, + "loss": 1.2007, + "step": 4507 + }, + { + "epoch": 0.95, + "learning_rate": 2.8026108600718746e-07, + "loss": 1.2223, + "step": 4508 + }, + { + "epoch": 0.95, + "learning_rate": 2.7799245430329526e-07, + "loss": 1.2345, + "step": 4509 + }, + { + "epoch": 0.95, + "learning_rate": 2.7573297764696085e-07, + "loss": 1.2377, + "step": 4510 + }, + { + "epoch": 0.95, + "learning_rate": 2.7348265708704745e-07, + "loss": 1.1932, + "step": 4511 + }, + { + "epoch": 0.95, + "learning_rate": 2.7124149366816177e-07, + "loss": 1.2411, + "step": 4512 + }, + { + "epoch": 0.95, + "learning_rate": 2.690094884306649e-07, + "loss": 1.2273, + "step": 4513 + }, + { + "epoch": 0.95, + "learning_rate": 2.667866424106591e-07, + "loss": 1.2573, + "step": 4514 + }, + { + "epoch": 0.95, + "learning_rate": 2.6457295664000573e-07, + "loss": 1.2285, + "step": 4515 + }, + { + "epoch": 0.95, + "learning_rate": 2.623684321463049e-07, + "loss": 1.2345, + "step": 4516 + }, + { + "epoch": 0.95, + "learning_rate": 2.6017306995290926e-07, + "loss": 1.2686, + "step": 4517 + }, + { + "epoch": 0.95, + "learning_rate": 2.579868710789124e-07, + "loss": 1.2229, + "step": 4518 + }, + { + "epoch": 0.95, + "learning_rate": 2.5580983653916035e-07, + "loss": 1.2112, + "step": 4519 + }, + { + "epoch": 0.95, + "learning_rate": 2.5364196734424475e-07, + "loss": 1.2086, + "step": 4520 + }, + { + "epoch": 0.95, + "learning_rate": 2.514832645004939e-07, + "loss": 1.2545, + "step": 4521 + }, + { + "epoch": 0.95, + "learning_rate": 2.493337290099973e-07, + "loss": 1.2147, + "step": 4522 + }, + { + "epoch": 0.95, + "learning_rate": 2.471933618705702e-07, + "loss": 1.2331, + "step": 4523 + }, + { + "epoch": 0.95, + "learning_rate": 2.4506216407578665e-07, + "loss": 1.2037, + "step": 4524 + }, + { + "epoch": 0.95, + "learning_rate": 2.429401366149553e-07, + "loss": 1.2079, + "step": 4525 + }, + { + "epoch": 0.95, + "learning_rate": 2.4082728047313487e-07, + "loss": 1.2442, + "step": 4526 + }, + { + "epoch": 0.95, + "learning_rate": 2.3872359663111856e-07, + "loss": 1.2323, + "step": 4527 + }, + { + "epoch": 0.95, + "learning_rate": 2.3662908606544964e-07, + "loss": 1.2278, + "step": 4528 + }, + { + "epoch": 0.95, + "learning_rate": 2.34543749748406e-07, + "loss": 1.279, + "step": 4529 + }, + { + "epoch": 0.95, + "learning_rate": 2.3246758864801544e-07, + "loss": 1.2132, + "step": 4530 + }, + { + "epoch": 0.95, + "learning_rate": 2.304006037280404e-07, + "loss": 1.2389, + "step": 4531 + }, + { + "epoch": 0.95, + "learning_rate": 2.2834279594798002e-07, + "loss": 1.2062, + "step": 4532 + }, + { + "epoch": 0.95, + "learning_rate": 2.2629416626308353e-07, + "loss": 1.1694, + "step": 4533 + }, + { + "epoch": 0.95, + "learning_rate": 2.2425471562433466e-07, + "loss": 1.2011, + "step": 4534 + }, + { + "epoch": 0.95, + "learning_rate": 2.222244449784494e-07, + "loss": 1.1841, + "step": 4535 + }, + { + "epoch": 0.95, + "learning_rate": 2.2020335526789616e-07, + "loss": 1.2362, + "step": 4536 + }, + { + "epoch": 0.95, + "learning_rate": 2.1819144743086883e-07, + "loss": 1.2204, + "step": 4537 + }, + { + "epoch": 0.95, + "learning_rate": 2.1618872240130928e-07, + "loss": 1.2544, + "step": 4538 + }, + { + "epoch": 0.95, + "learning_rate": 2.1419518110888938e-07, + "loss": 1.2338, + "step": 4539 + }, + { + "epoch": 0.95, + "learning_rate": 2.1221082447901774e-07, + "loss": 1.2449, + "step": 4540 + }, + { + "epoch": 0.96, + "learning_rate": 2.1023565343284425e-07, + "loss": 1.2354, + "step": 4541 + }, + { + "epoch": 0.96, + "learning_rate": 2.082696688872554e-07, + "loss": 1.2078, + "step": 4542 + }, + { + "epoch": 0.96, + "learning_rate": 2.063128717548657e-07, + "loss": 1.2149, + "step": 4543 + }, + { + "epoch": 0.96, + "learning_rate": 2.043652629440307e-07, + "loss": 1.2556, + "step": 4544 + }, + { + "epoch": 0.96, + "learning_rate": 2.0242684335884056e-07, + "loss": 1.2607, + "step": 4545 + }, + { + "epoch": 0.96, + "learning_rate": 2.0049761389911772e-07, + "loss": 1.2898, + "step": 4546 + }, + { + "epoch": 0.96, + "learning_rate": 1.9857757546041912e-07, + "loss": 1.2345, + "step": 4547 + }, + { + "epoch": 0.96, + "learning_rate": 1.9666672893403627e-07, + "loss": 1.2257, + "step": 4548 + }, + { + "epoch": 0.96, + "learning_rate": 1.9476507520699518e-07, + "loss": 1.2238, + "step": 4549 + }, + { + "epoch": 0.96, + "learning_rate": 1.928726151620497e-07, + "loss": 1.2325, + "step": 4550 + }, + { + "epoch": 0.96, + "learning_rate": 1.9098934967768823e-07, + "loss": 1.2727, + "step": 4551 + }, + { + "epoch": 0.96, + "learning_rate": 1.891152796281337e-07, + "loss": 1.2301, + "step": 4552 + }, + { + "epoch": 0.96, + "learning_rate": 1.8725040588333466e-07, + "loss": 1.1936, + "step": 4553 + }, + { + "epoch": 0.96, + "learning_rate": 1.853947293089764e-07, + "loss": 1.2217, + "step": 4554 + }, + { + "epoch": 0.96, + "learning_rate": 1.8354825076647432e-07, + "loss": 1.2005, + "step": 4555 + }, + { + "epoch": 0.96, + "learning_rate": 1.817109711129672e-07, + "loss": 1.1929, + "step": 4556 + }, + { + "epoch": 0.96, + "learning_rate": 1.7988289120133507e-07, + "loss": 1.2421, + "step": 4557 + }, + { + "epoch": 0.96, + "learning_rate": 1.7806401188017463e-07, + "loss": 1.2203, + "step": 4558 + }, + { + "epoch": 0.96, + "learning_rate": 1.7625433399382386e-07, + "loss": 1.2239, + "step": 4559 + }, + { + "epoch": 0.96, + "learning_rate": 1.7445385838234185e-07, + "loss": 1.223, + "step": 4560 + }, + { + "epoch": 0.96, + "learning_rate": 1.7266258588151562e-07, + "loss": 1.1813, + "step": 4561 + }, + { + "epoch": 0.96, + "learning_rate": 1.7088051732286448e-07, + "loss": 1.2105, + "step": 4562 + }, + { + "epoch": 0.96, + "learning_rate": 1.6910765353363334e-07, + "loss": 1.2597, + "step": 4563 + }, + { + "epoch": 0.96, + "learning_rate": 1.6734399533679057e-07, + "loss": 1.237, + "step": 4564 + }, + { + "epoch": 0.96, + "learning_rate": 1.6558954355103686e-07, + "loss": 1.2297, + "step": 4565 + }, + { + "epoch": 0.96, + "learning_rate": 1.6384429899079624e-07, + "loss": 1.2204, + "step": 4566 + }, + { + "epoch": 0.96, + "learning_rate": 1.6210826246622068e-07, + "loss": 1.1876, + "step": 4567 + }, + { + "epoch": 0.96, + "learning_rate": 1.603814347831856e-07, + "loss": 1.2152, + "step": 4568 + }, + { + "epoch": 0.96, + "learning_rate": 1.586638167432919e-07, + "loss": 1.2398, + "step": 4569 + }, + { + "epoch": 0.96, + "learning_rate": 1.5695540914386632e-07, + "loss": 1.2265, + "step": 4570 + }, + { + "epoch": 0.96, + "learning_rate": 1.552562127779611e-07, + "loss": 1.1975, + "step": 4571 + }, + { + "epoch": 0.96, + "learning_rate": 1.5356622843434533e-07, + "loss": 1.1959, + "step": 4572 + }, + { + "epoch": 0.96, + "learning_rate": 1.51885456897527e-07, + "loss": 1.2192, + "step": 4573 + }, + { + "epoch": 0.96, + "learning_rate": 1.5021389894771753e-07, + "loss": 1.2153, + "step": 4574 + }, + { + "epoch": 0.96, + "learning_rate": 1.4855155536087184e-07, + "loss": 1.2368, + "step": 4575 + }, + { + "epoch": 0.96, + "learning_rate": 1.4689842690865042e-07, + "loss": 1.2154, + "step": 4576 + }, + { + "epoch": 0.96, + "learning_rate": 1.4525451435844608e-07, + "loss": 1.2416, + "step": 4577 + }, + { + "epoch": 0.96, + "learning_rate": 1.436198184733706e-07, + "loss": 1.1973, + "step": 4578 + }, + { + "epoch": 0.96, + "learning_rate": 1.4199434001225697e-07, + "loss": 1.2245, + "step": 4579 + }, + { + "epoch": 0.96, + "learning_rate": 1.4037807972966167e-07, + "loss": 1.2799, + "step": 4580 + }, + { + "epoch": 0.96, + "learning_rate": 1.387710383758556e-07, + "loss": 1.2327, + "step": 4581 + }, + { + "epoch": 0.96, + "learning_rate": 1.3717321669683981e-07, + "loss": 1.2359, + "step": 4582 + }, + { + "epoch": 0.96, + "learning_rate": 1.3558461543432767e-07, + "loss": 1.2137, + "step": 4583 + }, + { + "epoch": 0.96, + "learning_rate": 1.3400523532575592e-07, + "loss": 1.2329, + "step": 4584 + }, + { + "epoch": 0.96, + "learning_rate": 1.324350771042804e-07, + "loss": 1.2091, + "step": 4585 + }, + { + "epoch": 0.96, + "learning_rate": 1.3087414149877574e-07, + "loss": 1.2304, + "step": 4586 + }, + { + "epoch": 0.96, + "learning_rate": 1.2932242923383575e-07, + "loss": 1.2547, + "step": 4587 + }, + { + "epoch": 0.97, + "learning_rate": 1.277799410297731e-07, + "loss": 1.2672, + "step": 4588 + }, + { + "epoch": 0.97, + "learning_rate": 1.26246677602615e-07, + "loss": 1.2167, + "step": 4589 + }, + { + "epoch": 0.97, + "learning_rate": 1.2472263966411214e-07, + "loss": 1.2341, + "step": 4590 + }, + { + "epoch": 0.97, + "learning_rate": 1.2320782792173192e-07, + "loss": 1.2091, + "step": 4591 + }, + { + "epoch": 0.97, + "learning_rate": 1.2170224307865185e-07, + "loss": 1.2378, + "step": 4592 + }, + { + "epoch": 0.97, + "learning_rate": 1.2020588583377513e-07, + "loss": 1.2021, + "step": 4593 + }, + { + "epoch": 0.97, + "learning_rate": 1.187187568817172e-07, + "loss": 1.2016, + "step": 4594 + }, + { + "epoch": 0.97, + "learning_rate": 1.1724085691280806e-07, + "loss": 1.2125, + "step": 4595 + }, + { + "epoch": 0.97, + "learning_rate": 1.1577218661309896e-07, + "loss": 1.2236, + "step": 4596 + }, + { + "epoch": 0.97, + "learning_rate": 1.1431274666435121e-07, + "loss": 1.161, + "step": 4597 + }, + { + "epoch": 0.97, + "learning_rate": 1.1286253774404288e-07, + "loss": 1.2346, + "step": 4598 + }, + { + "epoch": 0.97, + "learning_rate": 1.11421560525371e-07, + "loss": 1.2394, + "step": 4599 + }, + { + "epoch": 0.97, + "learning_rate": 1.0998981567724276e-07, + "loss": 1.2812, + "step": 4600 + }, + { + "epoch": 0.97, + "learning_rate": 1.0856730386427983e-07, + "loss": 1.2264, + "step": 4601 + }, + { + "epoch": 0.97, + "learning_rate": 1.0715402574681843e-07, + "loss": 1.2406, + "step": 4602 + }, + { + "epoch": 0.97, + "learning_rate": 1.0574998198090935e-07, + "loss": 1.2552, + "step": 4603 + }, + { + "epoch": 0.97, + "learning_rate": 1.0435517321831568e-07, + "loss": 1.1945, + "step": 4604 + }, + { + "epoch": 0.97, + "learning_rate": 1.0296960010651725e-07, + "loss": 1.1916, + "step": 4605 + }, + { + "epoch": 0.97, + "learning_rate": 1.0159326328869734e-07, + "loss": 1.1895, + "step": 4606 + }, + { + "epoch": 0.97, + "learning_rate": 1.0022616340376489e-07, + "loss": 1.2183, + "step": 4607 + }, + { + "epoch": 0.97, + "learning_rate": 9.886830108632784e-08, + "loss": 1.209, + "step": 4608 + }, + { + "epoch": 0.97, + "learning_rate": 9.751967696671749e-08, + "loss": 1.2673, + "step": 4609 + }, + { + "epoch": 0.97, + "learning_rate": 9.618029167096865e-08, + "loss": 1.271, + "step": 4610 + }, + { + "epoch": 0.97, + "learning_rate": 9.485014582083063e-08, + "loss": 1.1878, + "step": 4611 + }, + { + "epoch": 0.97, + "learning_rate": 9.352924003376285e-08, + "loss": 1.2163, + "step": 4612 + }, + { + "epoch": 0.97, + "learning_rate": 9.221757492293704e-08, + "loss": 1.2079, + "step": 4613 + }, + { + "epoch": 0.97, + "learning_rate": 9.091515109723281e-08, + "loss": 1.2322, + "step": 4614 + }, + { + "epoch": 0.97, + "learning_rate": 8.96219691612421e-08, + "loss": 1.213, + "step": 4615 + }, + { + "epoch": 0.97, + "learning_rate": 8.833802971526472e-08, + "loss": 1.2398, + "step": 4616 + }, + { + "epoch": 0.97, + "learning_rate": 8.706333335531503e-08, + "loss": 1.2366, + "step": 4617 + }, + { + "epoch": 0.97, + "learning_rate": 8.579788067310858e-08, + "loss": 1.2867, + "step": 4618 + }, + { + "epoch": 0.97, + "learning_rate": 8.454167225607768e-08, + "loss": 1.2004, + "step": 4619 + }, + { + "epoch": 0.97, + "learning_rate": 8.32947086873559e-08, + "loss": 1.235, + "step": 4620 + }, + { + "epoch": 0.97, + "learning_rate": 8.205699054579575e-08, + "loss": 1.2393, + "step": 4621 + }, + { + "epoch": 0.97, + "learning_rate": 8.082851840594652e-08, + "loss": 1.2185, + "step": 4622 + }, + { + "epoch": 0.97, + "learning_rate": 7.960929283807429e-08, + "loss": 1.2763, + "step": 4623 + }, + { + "epoch": 0.97, + "learning_rate": 7.83993144081463e-08, + "loss": 1.1988, + "step": 4624 + }, + { + "epoch": 0.97, + "learning_rate": 7.719858367784216e-08, + "loss": 1.2469, + "step": 4625 + }, + { + "epoch": 0.97, + "learning_rate": 7.600710120454491e-08, + "loss": 1.2057, + "step": 4626 + }, + { + "epoch": 0.97, + "learning_rate": 7.482486754134765e-08, + "loss": 1.2969, + "step": 4627 + }, + { + "epoch": 0.97, + "learning_rate": 7.365188323704919e-08, + "loss": 1.2414, + "step": 4628 + }, + { + "epoch": 0.97, + "learning_rate": 7.248814883615174e-08, + "loss": 1.2522, + "step": 4629 + }, + { + "epoch": 0.97, + "learning_rate": 7.133366487886762e-08, + "loss": 1.2131, + "step": 4630 + }, + { + "epoch": 0.97, + "learning_rate": 7.018843190111479e-08, + "loss": 1.2519, + "step": 4631 + }, + { + "epoch": 0.97, + "learning_rate": 6.90524504345147e-08, + "loss": 1.2755, + "step": 4632 + }, + { + "epoch": 0.97, + "learning_rate": 6.792572100639661e-08, + "loss": 1.2914, + "step": 4633 + }, + { + "epoch": 0.97, + "learning_rate": 6.680824413979103e-08, + "loss": 1.2579, + "step": 4634 + }, + { + "epoch": 0.97, + "learning_rate": 6.570002035343636e-08, + "loss": 1.2256, + "step": 4635 + }, + { + "epoch": 0.98, + "learning_rate": 6.460105016177887e-08, + "loss": 1.2272, + "step": 4636 + }, + { + "epoch": 0.98, + "learning_rate": 6.351133407495936e-08, + "loss": 1.2768, + "step": 4637 + }, + { + "epoch": 0.98, + "learning_rate": 6.24308725988354e-08, + "loss": 1.1901, + "step": 4638 + }, + { + "epoch": 0.98, + "learning_rate": 6.135966623495915e-08, + "loss": 1.2421, + "step": 4639 + }, + { + "epoch": 0.98, + "learning_rate": 6.029771548058838e-08, + "loss": 1.2004, + "step": 4640 + }, + { + "epoch": 0.98, + "learning_rate": 5.924502082868655e-08, + "loss": 1.2288, + "step": 4641 + }, + { + "epoch": 0.98, + "learning_rate": 5.820158276792054e-08, + "loss": 1.2435, + "step": 4642 + }, + { + "epoch": 0.98, + "learning_rate": 5.716740178266067e-08, + "loss": 1.2091, + "step": 4643 + }, + { + "epoch": 0.98, + "learning_rate": 5.614247835297404e-08, + "loss": 1.1938, + "step": 4644 + }, + { + "epoch": 0.98, + "learning_rate": 5.512681295463784e-08, + "loss": 1.2725, + "step": 4645 + }, + { + "epoch": 0.98, + "learning_rate": 5.4120406059128274e-08, + "loss": 1.2059, + "step": 4646 + }, + { + "epoch": 0.98, + "learning_rate": 5.312325813362274e-08, + "loss": 1.2319, + "step": 4647 + }, + { + "epoch": 0.98, + "learning_rate": 5.21353696410043e-08, + "loss": 1.1882, + "step": 4648 + }, + { + "epoch": 0.98, + "learning_rate": 5.11567410398528e-08, + "loss": 1.221, + "step": 4649 + }, + { + "epoch": 0.98, + "learning_rate": 5.0187372784453734e-08, + "loss": 1.2138, + "step": 4650 + }, + { + "epoch": 0.98, + "learning_rate": 4.922726532479383e-08, + "loss": 1.2108, + "step": 4651 + }, + { + "epoch": 0.98, + "learning_rate": 4.827641910655656e-08, + "loss": 1.2147, + "step": 4652 + }, + { + "epoch": 0.98, + "learning_rate": 4.7334834571128866e-08, + "loss": 1.2249, + "step": 4653 + }, + { + "epoch": 0.98, + "learning_rate": 4.640251215560332e-08, + "loss": 1.1814, + "step": 4654 + }, + { + "epoch": 0.98, + "learning_rate": 4.547945229276263e-08, + "loss": 1.2164, + "step": 4655 + }, + { + "epoch": 0.98, + "learning_rate": 4.456565541109958e-08, + "loss": 1.187, + "step": 4656 + }, + { + "epoch": 0.98, + "learning_rate": 4.366112193480154e-08, + "loss": 1.1888, + "step": 4657 + }, + { + "epoch": 0.98, + "learning_rate": 4.276585228375485e-08, + "loss": 1.2154, + "step": 4658 + }, + { + "epoch": 0.98, + "learning_rate": 4.187984687355151e-08, + "loss": 1.1806, + "step": 4659 + }, + { + "epoch": 0.98, + "learning_rate": 4.100310611547809e-08, + "loss": 1.2458, + "step": 4660 + }, + { + "epoch": 0.98, + "learning_rate": 4.013563041652013e-08, + "loss": 1.2249, + "step": 4661 + }, + { + "epoch": 0.98, + "learning_rate": 3.927742017936664e-08, + "loss": 1.2553, + "step": 4662 + }, + { + "epoch": 0.98, + "learning_rate": 3.8428475802398944e-08, + "loss": 1.2188, + "step": 4663 + }, + { + "epoch": 0.98, + "learning_rate": 3.7588797679706245e-08, + "loss": 1.2438, + "step": 4664 + }, + { + "epoch": 0.98, + "learning_rate": 3.6758386201065645e-08, + "loss": 1.2461, + "step": 4665 + }, + { + "epoch": 0.98, + "learning_rate": 3.5937241751962115e-08, + "loss": 1.2565, + "step": 4666 + }, + { + "epoch": 0.98, + "learning_rate": 3.5125364713572976e-08, + "loss": 1.2366, + "step": 4667 + }, + { + "epoch": 0.98, + "learning_rate": 3.4322755462774525e-08, + "loss": 1.1382, + "step": 4668 + }, + { + "epoch": 0.98, + "learning_rate": 3.3529414372142074e-08, + "loss": 1.2868, + "step": 4669 + }, + { + "epoch": 0.98, + "learning_rate": 3.2745341809949923e-08, + "loss": 1.233, + "step": 4670 + }, + { + "epoch": 0.98, + "learning_rate": 3.1970538140166927e-08, + "loss": 1.2067, + "step": 4671 + }, + { + "epoch": 0.98, + "learning_rate": 3.1205003722460935e-08, + "loss": 1.2252, + "step": 4672 + }, + { + "epoch": 0.98, + "learning_rate": 3.0448738912196574e-08, + "loss": 1.2462, + "step": 4673 + }, + { + "epoch": 0.98, + "learning_rate": 2.9701744060435246e-08, + "loss": 1.2097, + "step": 4674 + }, + { + "epoch": 0.98, + "learning_rate": 2.8964019513935126e-08, + "loss": 1.2165, + "step": 4675 + }, + { + "epoch": 0.98, + "learning_rate": 2.8235565615151172e-08, + "loss": 1.1683, + "step": 4676 + }, + { + "epoch": 0.98, + "learning_rate": 2.7516382702235112e-08, + "loss": 1.1979, + "step": 4677 + }, + { + "epoch": 0.98, + "learning_rate": 2.6806471109037668e-08, + "loss": 1.2402, + "step": 4678 + }, + { + "epoch": 0.98, + "learning_rate": 2.6105831165099683e-08, + "loss": 1.2255, + "step": 4679 + }, + { + "epoch": 0.98, + "learning_rate": 2.541446319566321e-08, + "loss": 1.2091, + "step": 4680 + }, + { + "epoch": 0.98, + "learning_rate": 2.473236752166264e-08, + "loss": 1.209, + "step": 4681 + }, + { + "epoch": 0.98, + "learning_rate": 2.4059544459731356e-08, + "loss": 1.1749, + "step": 4682 + }, + { + "epoch": 0.99, + "learning_rate": 2.3395994322199522e-08, + "loss": 1.18, + "step": 4683 + }, + { + "epoch": 0.99, + "learning_rate": 2.2741717417085196e-08, + "loss": 1.2378, + "step": 4684 + }, + { + "epoch": 0.99, + "learning_rate": 2.2096714048109867e-08, + "loss": 1.2202, + "step": 4685 + }, + { + "epoch": 0.99, + "learning_rate": 2.1460984514685145e-08, + "loss": 1.2915, + "step": 4686 + }, + { + "epoch": 0.99, + "learning_rate": 2.083452911192163e-08, + "loss": 1.2164, + "step": 4687 + }, + { + "epoch": 0.99, + "learning_rate": 2.021734813062226e-08, + "loss": 1.2087, + "step": 4688 + }, + { + "epoch": 0.99, + "learning_rate": 1.960944185728675e-08, + "loss": 1.1995, + "step": 4689 + }, + { + "epoch": 0.99, + "learning_rate": 1.9010810574102702e-08, + "loss": 1.1979, + "step": 4690 + }, + { + "epoch": 0.99, + "learning_rate": 1.842145455896338e-08, + "loss": 1.2474, + "step": 4691 + }, + { + "epoch": 0.99, + "learning_rate": 1.7841374085447728e-08, + "loss": 1.2469, + "step": 4692 + }, + { + "epoch": 0.99, + "learning_rate": 1.727056942283367e-08, + "loss": 1.2645, + "step": 4693 + }, + { + "epoch": 0.99, + "learning_rate": 1.6709040836089262e-08, + "loss": 1.2161, + "step": 4694 + }, + { + "epoch": 0.99, + "learning_rate": 1.6156788585879325e-08, + "loss": 1.2441, + "step": 4695 + }, + { + "epoch": 0.99, + "learning_rate": 1.5613812928563233e-08, + "loss": 1.2384, + "step": 4696 + }, + { + "epoch": 0.99, + "learning_rate": 1.508011411619048e-08, + "loss": 1.2884, + "step": 4697 + }, + { + "epoch": 0.99, + "learning_rate": 1.4555692396509557e-08, + "loss": 1.2231, + "step": 4698 + }, + { + "epoch": 0.99, + "learning_rate": 1.4040548012956844e-08, + "loss": 1.1984, + "step": 4699 + }, + { + "epoch": 0.99, + "learning_rate": 1.3534681204665502e-08, + "loss": 1.2186, + "step": 4700 + }, + { + "epoch": 0.99, + "learning_rate": 1.3038092206461017e-08, + "loss": 1.2536, + "step": 4701 + }, + { + "epoch": 0.99, + "learning_rate": 1.2550781248863442e-08, + "loss": 1.2366, + "step": 4702 + }, + { + "epoch": 0.99, + "learning_rate": 1.2072748558082936e-08, + "loss": 1.2491, + "step": 4703 + }, + { + "epoch": 0.99, + "learning_rate": 1.1603994356026437e-08, + "loss": 1.2115, + "step": 4704 + }, + { + "epoch": 0.99, + "learning_rate": 1.1144518860290998e-08, + "loss": 1.2219, + "step": 4705 + }, + { + "epoch": 0.99, + "learning_rate": 1.0694322284166003e-08, + "loss": 1.2137, + "step": 4706 + }, + { + "epoch": 0.99, + "learning_rate": 1.0253404836637615e-08, + "loss": 1.1968, + "step": 4707 + }, + { + "epoch": 0.99, + "learning_rate": 9.821766722379888e-09, + "loss": 1.2349, + "step": 4708 + }, + { + "epoch": 0.99, + "learning_rate": 9.399408141761434e-09, + "loss": 1.185, + "step": 4709 + }, + { + "epoch": 0.99, + "learning_rate": 8.9863292908432e-09, + "loss": 1.258, + "step": 4710 + }, + { + "epoch": 0.99, + "learning_rate": 8.58253036137846e-09, + "loss": 1.2634, + "step": 4711 + }, + { + "epoch": 0.99, + "learning_rate": 8.188011540812834e-09, + "loss": 1.2651, + "step": 4712 + }, + { + "epoch": 0.99, + "learning_rate": 7.80277301228205e-09, + "loss": 1.2436, + "step": 4713 + }, + { + "epoch": 0.99, + "learning_rate": 7.426814954618611e-09, + "loss": 1.2377, + "step": 4714 + }, + { + "epoch": 0.99, + "learning_rate": 7.060137542340695e-09, + "loss": 1.2328, + "step": 4715 + }, + { + "epoch": 0.99, + "learning_rate": 6.702740945663256e-09, + "loss": 1.1509, + "step": 4716 + }, + { + "epoch": 0.99, + "learning_rate": 6.35462533049358e-09, + "loss": 1.2448, + "step": 4717 + }, + { + "epoch": 0.99, + "learning_rate": 6.0157908584246305e-09, + "loss": 1.2704, + "step": 4718 + }, + { + "epoch": 0.99, + "learning_rate": 5.686237686746143e-09, + "loss": 1.2092, + "step": 4719 + }, + { + "epoch": 0.99, + "learning_rate": 5.36596596844019e-09, + "loss": 1.2177, + "step": 4720 + }, + { + "epoch": 0.99, + "learning_rate": 5.054975852176736e-09, + "loss": 1.2601, + "step": 4721 + }, + { + "epoch": 0.99, + "learning_rate": 4.7532674823203e-09, + "loss": 1.2601, + "step": 4722 + }, + { + "epoch": 0.99, + "learning_rate": 4.4608409989232995e-09, + "loss": 1.2555, + "step": 4723 + }, + { + "epoch": 0.99, + "learning_rate": 4.17769653773048e-09, + "loss": 1.2481, + "step": 4724 + }, + { + "epoch": 0.99, + "learning_rate": 3.903834230183368e-09, + "loss": 1.2643, + "step": 4725 + }, + { + "epoch": 0.99, + "learning_rate": 3.639254203406939e-09, + "loss": 1.2755, + "step": 4726 + }, + { + "epoch": 0.99, + "learning_rate": 3.383956580218506e-09, + "loss": 1.1891, + "step": 4727 + }, + { + "epoch": 0.99, + "learning_rate": 3.1379414791343766e-09, + "loss": 1.2494, + "step": 4728 + }, + { + "epoch": 0.99, + "learning_rate": 2.9012090143498704e-09, + "loss": 1.2551, + "step": 4729 + }, + { + "epoch": 0.99, + "learning_rate": 2.6737592957615243e-09, + "loss": 1.1741, + "step": 4730 + }, + { + "epoch": 1.0, + "learning_rate": 2.4555924289493272e-09, + "loss": 1.2429, + "step": 4731 + }, + { + "epoch": 1.0, + "learning_rate": 2.2467085151900436e-09, + "loss": 1.2265, + "step": 4732 + }, + { + "epoch": 1.0, + "learning_rate": 2.047107651446112e-09, + "loss": 1.2828, + "step": 4733 + }, + { + "epoch": 1.0, + "learning_rate": 1.8567899303767457e-09, + "loss": 1.2395, + "step": 4734 + }, + { + "epoch": 1.0, + "learning_rate": 1.6757554403223907e-09, + "loss": 1.2633, + "step": 4735 + }, + { + "epoch": 1.0, + "learning_rate": 1.5040042653269304e-09, + "loss": 1.2112, + "step": 4736 + }, + { + "epoch": 1.0, + "learning_rate": 1.3415364851132595e-09, + "loss": 1.2111, + "step": 4737 + }, + { + "epoch": 1.0, + "learning_rate": 1.188352175103269e-09, + "loss": 1.2249, + "step": 4738 + }, + { + "epoch": 1.0, + "learning_rate": 1.0444514064023025e-09, + "loss": 1.2669, + "step": 4739 + }, + { + "epoch": 1.0, + "learning_rate": 9.098342458102593e-10, + "loss": 1.2549, + "step": 4740 + }, + { + "epoch": 1.0, + "learning_rate": 7.845007558193729e-10, + "loss": 1.243, + "step": 4741 + }, + { + "epoch": 1.0, + "learning_rate": 6.684509946075501e-10, + "loss": 1.1938, + "step": 4742 + }, + { + "epoch": 1.0, + "learning_rate": 5.616850160494736e-10, + "loss": 1.2334, + "step": 4743 + }, + { + "epoch": 1.0, + "learning_rate": 4.642028697010581e-10, + "loss": 1.2111, + "step": 4744 + }, + { + "epoch": 1.0, + "learning_rate": 3.760046008172147e-10, + "loss": 1.2216, + "step": 4745 + }, + { + "epoch": 1.0, + "learning_rate": 2.97090250340748e-10, + "loss": 1.2322, + "step": 4746 + }, + { + "epoch": 1.0, + "learning_rate": 2.2745985490235656e-10, + "loss": 1.1637, + "step": 4747 + }, + { + "epoch": 1.0, + "learning_rate": 1.6711344682507346e-10, + "loss": 1.2659, + "step": 4748 + }, + { + "epoch": 1.0, + "learning_rate": 1.160510541220461e-10, + "loss": 1.2167, + "step": 4749 + }, + { + "epoch": 1.0, + "learning_rate": 7.427270049653601e-11, + "loss": 1.2163, + "step": 4750 + }, + { + "epoch": 1.0, + "learning_rate": 4.1778405341919016e-11, + "loss": 1.2346, + "step": 4751 + }, + { + "epoch": 1.0, + "learning_rate": 1.8568183743905566e-11, + "loss": 1.2219, + "step": 4752 + }, + { + "epoch": 1.0, + "learning_rate": 4.6420464738794465e-12, + "loss": 1.2163, + "step": 4753 + }, + { + "epoch": 1.0, + "learning_rate": 0.0, + "loss": 1.1564, + "step": 4754 + }, + { + "epoch": 1.0, + "step": 4754, + "total_flos": 1.1295276544628886e+23, + "train_loss": 1.2829506933864647, + "train_runtime": 87797.1342, + "train_samples_per_second": 6.931, + "train_steps_per_second": 0.054 + } + ], + "logging_steps": 1.0, + "max_steps": 4754, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 5000, + "total_flos": 1.1295276544628886e+23, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +}