{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "global_step": 4927, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 6.7567567567567575e-06, "loss": 8.3828, "step": 1 }, { "epoch": 0.0, "learning_rate": 1.3513513513513515e-05, "loss": 8.4141, "step": 2 }, { "epoch": 0.0, "learning_rate": 2.0270270270270273e-05, "loss": 8.0742, "step": 3 }, { "epoch": 0.0, "learning_rate": 2.702702702702703e-05, "loss": 6.5039, "step": 4 }, { "epoch": 0.0, "learning_rate": 3.3783783783783784e-05, "loss": 5.9336, "step": 5 }, { "epoch": 0.0, "learning_rate": 4.0540540540540545e-05, "loss": 5.7773, "step": 6 }, { "epoch": 0.0, "learning_rate": 4.72972972972973e-05, "loss": 5.5625, "step": 7 }, { "epoch": 0.0, "learning_rate": 5.405405405405406e-05, "loss": 5.418, "step": 8 }, { "epoch": 0.0, "learning_rate": 6.0810810810810814e-05, "loss": 5.3477, "step": 9 }, { "epoch": 0.0, "learning_rate": 6.756756756756757e-05, "loss": 5.2227, "step": 10 }, { "epoch": 0.0, "learning_rate": 7.432432432432433e-05, "loss": 5.1172, "step": 11 }, { "epoch": 0.0, "learning_rate": 8.108108108108109e-05, "loss": 5.0039, "step": 12 }, { "epoch": 0.0, "learning_rate": 8.783783783783784e-05, "loss": 4.8867, "step": 13 }, { "epoch": 0.0, "learning_rate": 9.45945945945946e-05, "loss": 4.793, "step": 14 }, { "epoch": 0.0, "learning_rate": 0.00010135135135135136, "loss": 4.7617, "step": 15 }, { "epoch": 0.0, "learning_rate": 0.00010810810810810812, "loss": 4.668, "step": 16 }, { "epoch": 0.0, "learning_rate": 0.00011486486486486487, "loss": 4.5, "step": 17 }, { "epoch": 0.0, "learning_rate": 0.00012162162162162163, "loss": 4.5703, "step": 18 }, { "epoch": 0.0, "learning_rate": 0.00012837837837837836, "loss": 4.4297, "step": 19 }, { "epoch": 0.0, "learning_rate": 0.00013513513513513514, "loss": 4.3594, "step": 20 }, { "epoch": 0.0, "learning_rate": 0.00014189189189189188, "loss": 4.3672, "step": 21 }, { "epoch": 0.0, "learning_rate": 0.00014864864864864866, "loss": 4.2012, "step": 22 }, { "epoch": 0.0, "learning_rate": 0.0001554054054054054, "loss": 4.1562, "step": 23 }, { "epoch": 0.0, "learning_rate": 0.00016216216216216218, "loss": 4.082, "step": 24 }, { "epoch": 0.01, "learning_rate": 0.00016891891891891893, "loss": 4.0234, "step": 25 }, { "epoch": 0.01, "learning_rate": 0.00017567567567567568, "loss": 3.9902, "step": 26 }, { "epoch": 0.01, "learning_rate": 0.00018243243243243242, "loss": 3.8652, "step": 27 }, { "epoch": 0.01, "learning_rate": 0.0001891891891891892, "loss": 3.8164, "step": 28 }, { "epoch": 0.01, "learning_rate": 0.00019594594594594594, "loss": 3.6934, "step": 29 }, { "epoch": 0.01, "learning_rate": 0.00020270270270270272, "loss": 3.5957, "step": 30 }, { "epoch": 0.01, "learning_rate": 0.00020945945945945947, "loss": 3.7227, "step": 31 }, { "epoch": 0.01, "learning_rate": 0.00021621621621621624, "loss": 3.5996, "step": 32 }, { "epoch": 0.01, "learning_rate": 0.000222972972972973, "loss": 3.5293, "step": 33 }, { "epoch": 0.01, "learning_rate": 0.00022972972972972974, "loss": 3.5391, "step": 34 }, { "epoch": 0.01, "learning_rate": 0.00023648648648648648, "loss": 3.5, "step": 35 }, { "epoch": 0.01, "learning_rate": 0.00024324324324324326, "loss": 3.2871, "step": 36 }, { "epoch": 0.01, "learning_rate": 0.00025, "loss": 3.4434, "step": 37 }, { "epoch": 0.01, "learning_rate": 0.0002567567567567567, "loss": 3.3789, "step": 38 }, { "epoch": 0.01, "learning_rate": 0.0002635135135135135, "loss": 3.3965, "step": 39 }, { "epoch": 0.01, "learning_rate": 0.0002702702702702703, "loss": 3.3652, "step": 40 }, { "epoch": 0.01, "learning_rate": 0.00027702702702702705, "loss": 3.3418, "step": 41 }, { "epoch": 0.01, "learning_rate": 0.00028378378378378377, "loss": 3.2324, "step": 42 }, { "epoch": 0.01, "learning_rate": 0.00029054054054054054, "loss": 3.1895, "step": 43 }, { "epoch": 0.01, "learning_rate": 0.0002972972972972973, "loss": 3.1895, "step": 44 }, { "epoch": 0.01, "learning_rate": 0.00030405405405405404, "loss": 3.2168, "step": 45 }, { "epoch": 0.01, "learning_rate": 0.0003108108108108108, "loss": 3.1504, "step": 46 }, { "epoch": 0.01, "learning_rate": 0.00031756756756756753, "loss": 3.2188, "step": 47 }, { "epoch": 0.01, "learning_rate": 0.00032432432432432436, "loss": 3.207, "step": 48 }, { "epoch": 0.01, "learning_rate": 0.0003310810810810811, "loss": 3.1289, "step": 49 }, { "epoch": 0.01, "learning_rate": 0.00033783783783783786, "loss": 3.0703, "step": 50 }, { "epoch": 0.01, "learning_rate": 0.0003445945945945946, "loss": 3.0898, "step": 51 }, { "epoch": 0.01, "learning_rate": 0.00035135135135135135, "loss": 3.1211, "step": 52 }, { "epoch": 0.01, "learning_rate": 0.0003581081081081081, "loss": 3.0371, "step": 53 }, { "epoch": 0.01, "learning_rate": 0.00036486486486486485, "loss": 3.1016, "step": 54 }, { "epoch": 0.01, "learning_rate": 0.0003716216216216216, "loss": 3.0625, "step": 55 }, { "epoch": 0.01, "learning_rate": 0.0003783783783783784, "loss": 3.0195, "step": 56 }, { "epoch": 0.01, "learning_rate": 0.00038513513513513517, "loss": 3.0039, "step": 57 }, { "epoch": 0.01, "learning_rate": 0.0003918918918918919, "loss": 2.9277, "step": 58 }, { "epoch": 0.01, "learning_rate": 0.00039864864864864866, "loss": 2.9668, "step": 59 }, { "epoch": 0.01, "learning_rate": 0.00040540540540540544, "loss": 2.998, "step": 60 }, { "epoch": 0.01, "learning_rate": 0.00041216216216216216, "loss": 3.0215, "step": 61 }, { "epoch": 0.01, "learning_rate": 0.00041891891891891893, "loss": 3.084, "step": 62 }, { "epoch": 0.01, "learning_rate": 0.00042567567567567565, "loss": 2.9883, "step": 63 }, { "epoch": 0.01, "learning_rate": 0.0004324324324324325, "loss": 2.8887, "step": 64 }, { "epoch": 0.01, "learning_rate": 0.0004391891891891892, "loss": 3.0293, "step": 65 }, { "epoch": 0.01, "learning_rate": 0.000445945945945946, "loss": 2.8965, "step": 66 }, { "epoch": 0.01, "learning_rate": 0.0004527027027027027, "loss": 2.8457, "step": 67 }, { "epoch": 0.01, "learning_rate": 0.00045945945945945947, "loss": 2.9082, "step": 68 }, { "epoch": 0.01, "learning_rate": 0.00046621621621621625, "loss": 2.9062, "step": 69 }, { "epoch": 0.01, "learning_rate": 0.00047297297297297297, "loss": 2.998, "step": 70 }, { "epoch": 0.01, "learning_rate": 0.00047972972972972974, "loss": 2.918, "step": 71 }, { "epoch": 0.01, "learning_rate": 0.0004864864864864865, "loss": 2.8926, "step": 72 }, { "epoch": 0.01, "learning_rate": 0.0004932432432432432, "loss": 2.8945, "step": 73 }, { "epoch": 0.02, "learning_rate": 0.0005, "loss": 2.8789, "step": 74 }, { "epoch": 0.02, "learning_rate": 0.0005067567567567568, "loss": 2.8145, "step": 75 }, { "epoch": 0.02, "learning_rate": 0.0005135135135135135, "loss": 2.8945, "step": 76 }, { "epoch": 0.02, "learning_rate": 0.0005202702702702703, "loss": 2.8828, "step": 77 }, { "epoch": 0.02, "learning_rate": 0.000527027027027027, "loss": 2.8711, "step": 78 }, { "epoch": 0.02, "learning_rate": 0.0005337837837837838, "loss": 2.8926, "step": 79 }, { "epoch": 0.02, "learning_rate": 0.0005405405405405405, "loss": 2.877, "step": 80 }, { "epoch": 0.02, "learning_rate": 0.0005472972972972973, "loss": 2.9023, "step": 81 }, { "epoch": 0.02, "learning_rate": 0.0005540540540540541, "loss": 2.9434, "step": 82 }, { "epoch": 0.02, "learning_rate": 0.0005608108108108109, "loss": 2.8262, "step": 83 }, { "epoch": 0.02, "learning_rate": 0.0005675675675675675, "loss": 2.916, "step": 84 }, { "epoch": 0.02, "learning_rate": 0.0005743243243243243, "loss": 2.8281, "step": 85 }, { "epoch": 0.02, "learning_rate": 0.0005810810810810811, "loss": 2.832, "step": 86 }, { "epoch": 0.02, "learning_rate": 0.0005878378378378379, "loss": 2.8262, "step": 87 }, { "epoch": 0.02, "learning_rate": 0.0005945945945945946, "loss": 2.7988, "step": 88 }, { "epoch": 0.02, "learning_rate": 0.0006013513513513513, "loss": 2.7969, "step": 89 }, { "epoch": 0.02, "learning_rate": 0.0006081081081081081, "loss": 2.8555, "step": 90 }, { "epoch": 0.02, "learning_rate": 0.000614864864864865, "loss": 2.8887, "step": 91 }, { "epoch": 0.02, "learning_rate": 0.0006216216216216216, "loss": 2.8184, "step": 92 }, { "epoch": 0.02, "learning_rate": 0.0006283783783783784, "loss": 2.8516, "step": 93 }, { "epoch": 0.02, "learning_rate": 0.0006351351351351351, "loss": 2.9199, "step": 94 }, { "epoch": 0.02, "learning_rate": 0.000641891891891892, "loss": 2.8496, "step": 95 }, { "epoch": 0.02, "learning_rate": 0.0006486486486486487, "loss": 2.8203, "step": 96 }, { "epoch": 0.02, "learning_rate": 0.0006554054054054054, "loss": 2.8359, "step": 97 }, { "epoch": 0.02, "learning_rate": 0.0006621621621621622, "loss": 2.8086, "step": 98 }, { "epoch": 0.02, "learning_rate": 0.0006689189189189189, "loss": 2.8613, "step": 99 }, { "epoch": 0.02, "learning_rate": 0.0006756756756756757, "loss": 2.9062, "step": 100 }, { "epoch": 0.02, "learning_rate": 0.0006824324324324325, "loss": 2.8594, "step": 101 }, { "epoch": 0.02, "learning_rate": 0.0006891891891891892, "loss": 2.8477, "step": 102 }, { "epoch": 0.02, "learning_rate": 0.0006959459459459459, "loss": 2.7422, "step": 103 }, { "epoch": 0.02, "learning_rate": 0.0007027027027027027, "loss": 2.8516, "step": 104 }, { "epoch": 0.02, "learning_rate": 0.0007094594594594595, "loss": 2.7891, "step": 105 }, { "epoch": 0.02, "learning_rate": 0.0007162162162162163, "loss": 2.8535, "step": 106 }, { "epoch": 0.02, "learning_rate": 0.000722972972972973, "loss": 2.793, "step": 107 }, { "epoch": 0.02, "learning_rate": 0.0007297297297297297, "loss": 2.7754, "step": 108 }, { "epoch": 0.02, "learning_rate": 0.0007364864864864866, "loss": 2.7773, "step": 109 }, { "epoch": 0.02, "learning_rate": 0.0007432432432432432, "loss": 2.7773, "step": 110 }, { "epoch": 0.02, "learning_rate": 0.00075, "loss": 2.8105, "step": 111 }, { "epoch": 0.02, "learning_rate": 0.0007567567567567568, "loss": 2.7266, "step": 112 }, { "epoch": 0.02, "learning_rate": 0.0007635135135135135, "loss": 2.7441, "step": 113 }, { "epoch": 0.02, "learning_rate": 0.0007702702702702703, "loss": 2.7598, "step": 114 }, { "epoch": 0.02, "learning_rate": 0.000777027027027027, "loss": 2.7578, "step": 115 }, { "epoch": 0.02, "learning_rate": 0.0007837837837837838, "loss": 2.7891, "step": 116 }, { "epoch": 0.02, "learning_rate": 0.0007905405405405406, "loss": 2.7363, "step": 117 }, { "epoch": 0.02, "learning_rate": 0.0007972972972972973, "loss": 2.7109, "step": 118 }, { "epoch": 0.02, "learning_rate": 0.0008040540540540541, "loss": 2.6582, "step": 119 }, { "epoch": 0.02, "learning_rate": 0.0008108108108108109, "loss": 2.6191, "step": 120 }, { "epoch": 0.02, "learning_rate": 0.0008175675675675675, "loss": 2.7969, "step": 121 }, { "epoch": 0.02, "learning_rate": 0.0008243243243243243, "loss": 2.6816, "step": 122 }, { "epoch": 0.02, "learning_rate": 0.0008310810810810811, "loss": 2.6738, "step": 123 }, { "epoch": 0.03, "learning_rate": 0.0008378378378378379, "loss": 2.7422, "step": 124 }, { "epoch": 0.03, "learning_rate": 0.0008445945945945946, "loss": 2.7754, "step": 125 }, { "epoch": 0.03, "learning_rate": 0.0008513513513513513, "loss": 2.7754, "step": 126 }, { "epoch": 0.03, "learning_rate": 0.0008581081081081081, "loss": 2.7363, "step": 127 }, { "epoch": 0.03, "learning_rate": 0.000864864864864865, "loss": 2.7129, "step": 128 }, { "epoch": 0.03, "learning_rate": 0.0008716216216216216, "loss": 2.7617, "step": 129 }, { "epoch": 0.03, "learning_rate": 0.0008783783783783784, "loss": 2.6523, "step": 130 }, { "epoch": 0.03, "learning_rate": 0.0008851351351351351, "loss": 2.6895, "step": 131 }, { "epoch": 0.03, "learning_rate": 0.000891891891891892, "loss": 2.7559, "step": 132 }, { "epoch": 0.03, "learning_rate": 0.0008986486486486487, "loss": 2.6621, "step": 133 }, { "epoch": 0.03, "learning_rate": 0.0009054054054054054, "loss": 2.7109, "step": 134 }, { "epoch": 0.03, "learning_rate": 0.0009121621621621622, "loss": 2.7344, "step": 135 }, { "epoch": 0.03, "learning_rate": 0.0009189189189189189, "loss": 2.6172, "step": 136 }, { "epoch": 0.03, "learning_rate": 0.0009256756756756757, "loss": 2.6523, "step": 137 }, { "epoch": 0.03, "learning_rate": 0.0009324324324324325, "loss": 2.6113, "step": 138 }, { "epoch": 0.03, "learning_rate": 0.0009391891891891892, "loss": 2.7227, "step": 139 }, { "epoch": 0.03, "learning_rate": 0.0009459459459459459, "loss": 2.6836, "step": 140 }, { "epoch": 0.03, "learning_rate": 0.0009527027027027027, "loss": 2.6348, "step": 141 }, { "epoch": 0.03, "learning_rate": 0.0009594594594594595, "loss": 2.6953, "step": 142 }, { "epoch": 0.03, "learning_rate": 0.0009662162162162163, "loss": 2.6699, "step": 143 }, { "epoch": 0.03, "learning_rate": 0.000972972972972973, "loss": 2.7266, "step": 144 }, { "epoch": 0.03, "learning_rate": 0.0009797297297297297, "loss": 2.6309, "step": 145 }, { "epoch": 0.03, "learning_rate": 0.0009864864864864865, "loss": 2.6719, "step": 146 }, { "epoch": 0.03, "learning_rate": 0.0009932432432432432, "loss": 2.6602, "step": 147 }, { "epoch": 0.03, "learning_rate": 0.001, "loss": 2.5957, "step": 148 }, { "epoch": 0.03, "learning_rate": 0.0009999998919647012, "loss": 2.6289, "step": 149 }, { "epoch": 0.03, "learning_rate": 0.0009999995678588516, "loss": 2.6777, "step": 150 }, { "epoch": 0.03, "learning_rate": 0.000999999027682591, "loss": 2.5996, "step": 151 }, { "epoch": 0.03, "learning_rate": 0.000999998271436153, "loss": 2.6289, "step": 152 }, { "epoch": 0.03, "learning_rate": 0.0009999972991198646, "loss": 2.6875, "step": 153 }, { "epoch": 0.03, "learning_rate": 0.0009999961107341458, "loss": 2.6055, "step": 154 }, { "epoch": 0.03, "learning_rate": 0.0009999947062795098, "loss": 2.7168, "step": 155 }, { "epoch": 0.03, "learning_rate": 0.0009999930857565642, "loss": 2.6406, "step": 156 }, { "epoch": 0.03, "learning_rate": 0.0009999912491660088, "loss": 2.6055, "step": 157 }, { "epoch": 0.03, "learning_rate": 0.0009999891965086374, "loss": 2.5625, "step": 158 }, { "epoch": 0.03, "learning_rate": 0.000999986927785337, "loss": 2.6055, "step": 159 }, { "epoch": 0.03, "learning_rate": 0.0009999844429970884, "loss": 2.5684, "step": 160 }, { "epoch": 0.03, "learning_rate": 0.0009999817421449649, "loss": 2.6328, "step": 161 }, { "epoch": 0.03, "learning_rate": 0.0009999788252301337, "loss": 2.5801, "step": 162 }, { "epoch": 0.03, "learning_rate": 0.0009999756922538553, "loss": 2.6172, "step": 163 }, { "epoch": 0.03, "learning_rate": 0.000999972343217484, "loss": 2.5938, "step": 164 }, { "epoch": 0.03, "learning_rate": 0.0009999687781224664, "loss": 2.5703, "step": 165 }, { "epoch": 0.03, "learning_rate": 0.0009999649969703436, "loss": 2.6836, "step": 166 }, { "epoch": 0.03, "learning_rate": 0.0009999609997627496, "loss": 2.5938, "step": 167 }, { "epoch": 0.03, "learning_rate": 0.0009999567865014117, "loss": 2.5781, "step": 168 }, { "epoch": 0.03, "learning_rate": 0.0009999523571881503, "loss": 2.5469, "step": 169 }, { "epoch": 0.03, "learning_rate": 0.0009999477118248798, "loss": 2.5684, "step": 170 }, { "epoch": 0.03, "learning_rate": 0.0009999428504136078, "loss": 2.6465, "step": 171 }, { "epoch": 0.03, "learning_rate": 0.0009999377729564348, "loss": 2.5332, "step": 172 }, { "epoch": 0.04, "learning_rate": 0.0009999324794555552, "loss": 2.5508, "step": 173 }, { "epoch": 0.04, "learning_rate": 0.000999926969913256, "loss": 2.5645, "step": 174 }, { "epoch": 0.04, "learning_rate": 0.000999921244331919, "loss": 2.5801, "step": 175 }, { "epoch": 0.04, "learning_rate": 0.0009999153027140178, "loss": 2.5137, "step": 176 }, { "epoch": 0.04, "learning_rate": 0.0009999091450621203, "loss": 2.5273, "step": 177 }, { "epoch": 0.04, "learning_rate": 0.0009999027713788872, "loss": 2.5508, "step": 178 }, { "epoch": 0.04, "learning_rate": 0.0009998961816670732, "loss": 2.6016, "step": 179 }, { "epoch": 0.04, "learning_rate": 0.0009998893759295257, "loss": 2.5508, "step": 180 }, { "epoch": 0.04, "learning_rate": 0.0009998823541691858, "loss": 2.5371, "step": 181 }, { "epoch": 0.04, "learning_rate": 0.0009998751163890882, "loss": 2.502, "step": 182 }, { "epoch": 0.04, "learning_rate": 0.00099986766259236, "loss": 2.5781, "step": 183 }, { "epoch": 0.04, "learning_rate": 0.000999859992782223, "loss": 2.4941, "step": 184 }, { "epoch": 0.04, "learning_rate": 0.0009998521069619912, "loss": 2.4473, "step": 185 }, { "epoch": 0.04, "learning_rate": 0.0009998440051350724, "loss": 2.5234, "step": 186 }, { "epoch": 0.04, "learning_rate": 0.0009998356873049678, "loss": 2.5625, "step": 187 }, { "epoch": 0.04, "learning_rate": 0.000999827153475272, "loss": 2.4727, "step": 188 }, { "epoch": 0.04, "learning_rate": 0.000999818403649673, "loss": 2.4863, "step": 189 }, { "epoch": 0.04, "learning_rate": 0.0009998094378319514, "loss": 2.5352, "step": 190 }, { "epoch": 0.04, "learning_rate": 0.000999800256025982, "loss": 2.5449, "step": 191 }, { "epoch": 0.04, "learning_rate": 0.0009997908582357324, "loss": 2.6094, "step": 192 }, { "epoch": 0.04, "learning_rate": 0.0009997812444652644, "loss": 2.5039, "step": 193 }, { "epoch": 0.04, "learning_rate": 0.000999771414718732, "loss": 2.5547, "step": 194 }, { "epoch": 0.04, "learning_rate": 0.000999761369000383, "loss": 2.5469, "step": 195 }, { "epoch": 0.04, "learning_rate": 0.0009997511073145586, "loss": 2.5371, "step": 196 }, { "epoch": 0.04, "learning_rate": 0.0009997406296656938, "loss": 2.5039, "step": 197 }, { "epoch": 0.04, "learning_rate": 0.0009997299360583157, "loss": 2.5449, "step": 198 }, { "epoch": 0.04, "learning_rate": 0.000999719026497046, "loss": 2.5, "step": 199 }, { "epoch": 0.04, "learning_rate": 0.0009997079009865988, "loss": 2.459, "step": 200 }, { "epoch": 0.04, "learning_rate": 0.000999696559531782, "loss": 2.4961, "step": 201 }, { "epoch": 0.04, "learning_rate": 0.0009996850021374967, "loss": 2.541, "step": 202 }, { "epoch": 0.04, "learning_rate": 0.0009996732288087375, "loss": 2.5117, "step": 203 }, { "epoch": 0.04, "learning_rate": 0.0009996612395505922, "loss": 2.4629, "step": 204 }, { "epoch": 0.04, "learning_rate": 0.0009996490343682413, "loss": 2.4219, "step": 205 }, { "epoch": 0.04, "learning_rate": 0.00099963661326696, "loss": 2.4922, "step": 206 }, { "epoch": 0.04, "learning_rate": 0.000999623976252115, "loss": 2.5449, "step": 207 }, { "epoch": 0.04, "learning_rate": 0.0009996111233291681, "loss": 2.4512, "step": 208 }, { "epoch": 0.04, "learning_rate": 0.000999598054503673, "loss": 2.5371, "step": 209 }, { "epoch": 0.04, "learning_rate": 0.0009995847697812778, "loss": 2.4766, "step": 210 }, { "epoch": 0.04, "learning_rate": 0.000999571269167723, "loss": 2.4707, "step": 211 }, { "epoch": 0.04, "learning_rate": 0.0009995575526688426, "loss": 2.541, "step": 212 }, { "epoch": 0.04, "learning_rate": 0.0009995436202905649, "loss": 2.4922, "step": 213 }, { "epoch": 0.04, "learning_rate": 0.0009995294720389096, "loss": 2.5156, "step": 214 }, { "epoch": 0.04, "learning_rate": 0.0009995151079199916, "loss": 2.502, "step": 215 }, { "epoch": 0.04, "learning_rate": 0.0009995005279400176, "loss": 2.4258, "step": 216 }, { "epoch": 0.04, "learning_rate": 0.0009994857321052885, "loss": 2.4902, "step": 217 }, { "epoch": 0.04, "learning_rate": 0.0009994707204221984, "loss": 2.4473, "step": 218 }, { "epoch": 0.04, "learning_rate": 0.000999455492897234, "loss": 2.4785, "step": 219 }, { "epoch": 0.04, "learning_rate": 0.000999440049536976, "loss": 2.459, "step": 220 }, { "epoch": 0.04, "learning_rate": 0.000999424390348098, "loss": 2.5645, "step": 221 }, { "epoch": 0.05, "learning_rate": 0.0009994085153373674, "loss": 2.4902, "step": 222 }, { "epoch": 0.05, "learning_rate": 0.000999392424511644, "loss": 2.4883, "step": 223 }, { "epoch": 0.05, "learning_rate": 0.0009993761178778812, "loss": 2.4316, "step": 224 }, { "epoch": 0.05, "learning_rate": 0.0009993595954431262, "loss": 2.5195, "step": 225 }, { "epoch": 0.05, "learning_rate": 0.0009993428572145186, "loss": 2.3906, "step": 226 }, { "epoch": 0.05, "learning_rate": 0.000999325903199292, "loss": 2.4023, "step": 227 }, { "epoch": 0.05, "learning_rate": 0.000999308733404773, "loss": 2.4102, "step": 228 }, { "epoch": 0.05, "learning_rate": 0.000999291347838381, "loss": 2.5625, "step": 229 }, { "epoch": 0.05, "learning_rate": 0.0009992737465076293, "loss": 2.4707, "step": 230 }, { "epoch": 0.05, "learning_rate": 0.000999255929420124, "loss": 2.4961, "step": 231 }, { "epoch": 0.05, "learning_rate": 0.0009992378965835646, "loss": 2.4102, "step": 232 }, { "epoch": 0.05, "learning_rate": 0.000999219648005744, "loss": 2.459, "step": 233 }, { "epoch": 0.05, "learning_rate": 0.0009992011836945482, "loss": 2.4434, "step": 234 }, { "epoch": 0.05, "learning_rate": 0.000999182503657956, "loss": 2.4551, "step": 235 }, { "epoch": 0.05, "learning_rate": 0.0009991636079040402, "loss": 2.3945, "step": 236 }, { "epoch": 0.05, "learning_rate": 0.0009991444964409664, "loss": 2.4277, "step": 237 }, { "epoch": 0.05, "learning_rate": 0.0009991251692769933, "loss": 2.4668, "step": 238 }, { "epoch": 0.05, "learning_rate": 0.000999105626420473, "loss": 2.5176, "step": 239 }, { "epoch": 0.05, "learning_rate": 0.0009990858678798507, "loss": 2.4609, "step": 240 }, { "epoch": 0.05, "learning_rate": 0.000999065893663665, "loss": 2.3457, "step": 241 }, { "epoch": 0.05, "learning_rate": 0.0009990457037805476, "loss": 2.3809, "step": 242 }, { "epoch": 0.05, "learning_rate": 0.0009990252982392234, "loss": 2.4883, "step": 243 }, { "epoch": 0.05, "learning_rate": 0.0009990046770485103, "loss": 2.4648, "step": 244 }, { "epoch": 0.05, "learning_rate": 0.0009989838402173197, "loss": 2.4141, "step": 245 }, { "epoch": 0.05, "learning_rate": 0.000998962787754656, "loss": 2.4395, "step": 246 }, { "epoch": 0.05, "learning_rate": 0.000998941519669617, "loss": 2.4766, "step": 247 }, { "epoch": 0.05, "learning_rate": 0.0009989200359713931, "loss": 2.4668, "step": 248 }, { "epoch": 0.05, "learning_rate": 0.0009988983366692689, "loss": 2.4316, "step": 249 }, { "epoch": 0.05, "learning_rate": 0.0009988764217726208, "loss": 2.3633, "step": 250 }, { "epoch": 0.05, "learning_rate": 0.00099885429129092, "loss": 2.4277, "step": 251 }, { "epoch": 0.05, "learning_rate": 0.0009988319452337293, "loss": 2.4297, "step": 252 }, { "epoch": 0.05, "learning_rate": 0.0009988093836107057, "loss": 2.4238, "step": 253 }, { "epoch": 0.05, "learning_rate": 0.000998786606431599, "loss": 2.4219, "step": 254 }, { "epoch": 0.05, "learning_rate": 0.000998763613706252, "loss": 2.4277, "step": 255 }, { "epoch": 0.05, "learning_rate": 0.0009987404054446008, "loss": 2.3789, "step": 256 }, { "epoch": 0.05, "learning_rate": 0.0009987169816566748, "loss": 2.4551, "step": 257 }, { "epoch": 0.05, "learning_rate": 0.000998693342352596, "loss": 2.4258, "step": 258 }, { "epoch": 0.05, "learning_rate": 0.0009986694875425807, "loss": 2.3887, "step": 259 }, { "epoch": 0.05, "learning_rate": 0.0009986454172369369, "loss": 2.3789, "step": 260 }, { "epoch": 0.05, "learning_rate": 0.0009986211314460664, "loss": 2.3809, "step": 261 }, { "epoch": 0.05, "learning_rate": 0.0009985966301804643, "loss": 2.3555, "step": 262 }, { "epoch": 0.05, "learning_rate": 0.0009985719134507185, "loss": 2.4219, "step": 263 }, { "epoch": 0.05, "learning_rate": 0.0009985469812675103, "loss": 2.4043, "step": 264 }, { "epoch": 0.05, "learning_rate": 0.0009985218336416137, "loss": 2.4082, "step": 265 }, { "epoch": 0.05, "learning_rate": 0.000998496470583896, "loss": 2.459, "step": 266 }, { "epoch": 0.05, "learning_rate": 0.0009984708921053178, "loss": 2.4297, "step": 267 }, { "epoch": 0.05, "learning_rate": 0.0009984450982169326, "loss": 2.4922, "step": 268 }, { "epoch": 0.05, "learning_rate": 0.0009984190889298868, "loss": 2.4043, "step": 269 }, { "epoch": 0.05, "learning_rate": 0.0009983928642554203, "loss": 2.4238, "step": 270 }, { "epoch": 0.06, "learning_rate": 0.0009983664242048658, "loss": 2.4062, "step": 271 }, { "epoch": 0.06, "learning_rate": 0.0009983397687896491, "loss": 2.4199, "step": 272 }, { "epoch": 0.06, "learning_rate": 0.0009983128980212892, "loss": 2.4453, "step": 273 }, { "epoch": 0.06, "learning_rate": 0.000998285811911398, "loss": 2.457, "step": 274 }, { "epoch": 0.06, "learning_rate": 0.0009982585104716805, "loss": 2.4492, "step": 275 }, { "epoch": 0.06, "learning_rate": 0.0009982309937139344, "loss": 2.4512, "step": 276 }, { "epoch": 0.06, "learning_rate": 0.0009982032616500517, "loss": 2.459, "step": 277 }, { "epoch": 0.06, "learning_rate": 0.0009981753142920158, "loss": 2.4121, "step": 278 }, { "epoch": 0.06, "learning_rate": 0.0009981471516519044, "loss": 2.4258, "step": 279 }, { "epoch": 0.06, "learning_rate": 0.0009981187737418873, "loss": 2.4316, "step": 280 }, { "epoch": 0.06, "learning_rate": 0.0009980901805742282, "loss": 2.4824, "step": 281 }, { "epoch": 0.06, "learning_rate": 0.000998061372161283, "loss": 2.4023, "step": 282 }, { "epoch": 0.06, "learning_rate": 0.0009980323485155012, "loss": 2.3711, "step": 283 }, { "epoch": 0.06, "learning_rate": 0.0009980031096494253, "loss": 2.3418, "step": 284 }, { "epoch": 0.06, "learning_rate": 0.0009979736555756902, "loss": 2.3652, "step": 285 }, { "epoch": 0.06, "learning_rate": 0.0009979439863070244, "loss": 2.4023, "step": 286 }, { "epoch": 0.06, "learning_rate": 0.0009979141018562495, "loss": 2.4434, "step": 287 }, { "epoch": 0.06, "learning_rate": 0.0009978840022362792, "loss": 2.4824, "step": 288 }, { "epoch": 0.06, "learning_rate": 0.0009978536874601213, "loss": 2.4648, "step": 289 }, { "epoch": 0.06, "learning_rate": 0.0009978231575408759, "loss": 2.332, "step": 290 }, { "epoch": 0.06, "learning_rate": 0.000997792412491736, "loss": 2.4219, "step": 291 }, { "epoch": 0.06, "learning_rate": 0.0009977614523259883, "loss": 2.334, "step": 292 }, { "epoch": 0.06, "learning_rate": 0.0009977302770570115, "loss": 2.3633, "step": 293 }, { "epoch": 0.06, "learning_rate": 0.0009976988866982782, "loss": 2.4824, "step": 294 }, { "epoch": 0.06, "learning_rate": 0.000997667281263353, "loss": 2.4375, "step": 295 }, { "epoch": 0.06, "learning_rate": 0.000997635460765894, "loss": 2.4434, "step": 296 }, { "epoch": 0.06, "learning_rate": 0.0009976034252196524, "loss": 2.3633, "step": 297 }, { "epoch": 0.06, "learning_rate": 0.0009975711746384717, "loss": 2.334, "step": 298 }, { "epoch": 0.06, "learning_rate": 0.0009975387090362892, "loss": 2.3633, "step": 299 }, { "epoch": 0.06, "learning_rate": 0.000997506028427134, "loss": 2.4043, "step": 300 }, { "epoch": 0.06, "learning_rate": 0.0009974731328251294, "loss": 2.4551, "step": 301 }, { "epoch": 0.06, "learning_rate": 0.0009974400222444904, "loss": 2.4473, "step": 302 }, { "epoch": 0.06, "learning_rate": 0.0009974066966995257, "loss": 2.3496, "step": 303 }, { "epoch": 0.06, "learning_rate": 0.0009973731562046366, "loss": 2.4082, "step": 304 }, { "epoch": 0.06, "learning_rate": 0.0009973394007743175, "loss": 2.3398, "step": 305 }, { "epoch": 0.06, "learning_rate": 0.0009973054304231552, "loss": 2.4727, "step": 306 }, { "epoch": 0.06, "learning_rate": 0.00099727124516583, "loss": 2.4121, "step": 307 }, { "epoch": 0.06, "learning_rate": 0.0009972368450171145, "loss": 2.4414, "step": 308 }, { "epoch": 0.06, "learning_rate": 0.0009972022299918745, "loss": 2.3867, "step": 309 }, { "epoch": 0.06, "learning_rate": 0.0009971674001050686, "loss": 2.4062, "step": 310 }, { "epoch": 0.06, "learning_rate": 0.0009971323553717483, "loss": 2.3535, "step": 311 }, { "epoch": 0.06, "learning_rate": 0.0009970970958070576, "loss": 2.3496, "step": 312 }, { "epoch": 0.06, "learning_rate": 0.000997061621426234, "loss": 2.3711, "step": 313 }, { "epoch": 0.06, "learning_rate": 0.0009970259322446072, "loss": 2.4238, "step": 314 }, { "epoch": 0.06, "learning_rate": 0.0009969900282776, "loss": 2.3379, "step": 315 }, { "epoch": 0.06, "learning_rate": 0.000996953909540728, "loss": 2.3867, "step": 316 }, { "epoch": 0.06, "learning_rate": 0.0009969175760495997, "loss": 2.3867, "step": 317 }, { "epoch": 0.06, "learning_rate": 0.000996881027819916, "loss": 2.375, "step": 318 }, { "epoch": 0.06, "learning_rate": 0.0009968442648674713, "loss": 2.4141, "step": 319 }, { "epoch": 0.06, "learning_rate": 0.000996807287208152, "loss": 2.4316, "step": 320 }, { "epoch": 0.07, "learning_rate": 0.0009967700948579378, "loss": 2.459, "step": 321 }, { "epoch": 0.07, "learning_rate": 0.0009967326878329015, "loss": 2.4336, "step": 322 }, { "epoch": 0.07, "learning_rate": 0.0009966950661492073, "loss": 2.3457, "step": 323 }, { "epoch": 0.07, "learning_rate": 0.0009966572298231139, "loss": 2.3555, "step": 324 }, { "epoch": 0.07, "learning_rate": 0.0009966191788709714, "loss": 2.3379, "step": 325 }, { "epoch": 0.07, "learning_rate": 0.0009965809133092236, "loss": 2.3789, "step": 326 }, { "epoch": 0.07, "learning_rate": 0.0009965424331544064, "loss": 2.3633, "step": 327 }, { "epoch": 0.07, "learning_rate": 0.0009965037384231487, "loss": 2.3398, "step": 328 }, { "epoch": 0.07, "learning_rate": 0.000996464829132172, "loss": 2.3887, "step": 329 }, { "epoch": 0.07, "learning_rate": 0.0009964257052982907, "loss": 2.4199, "step": 330 }, { "epoch": 0.07, "learning_rate": 0.0009963863669384119, "loss": 2.4512, "step": 331 }, { "epoch": 0.07, "learning_rate": 0.000996346814069535, "loss": 2.3711, "step": 332 }, { "epoch": 0.07, "learning_rate": 0.0009963070467087528, "loss": 2.4238, "step": 333 }, { "epoch": 0.07, "learning_rate": 0.0009962670648732503, "loss": 2.3984, "step": 334 }, { "epoch": 0.07, "learning_rate": 0.0009962268685803053, "loss": 2.3594, "step": 335 }, { "epoch": 0.07, "learning_rate": 0.0009961864578472883, "loss": 2.3809, "step": 336 }, { "epoch": 0.07, "learning_rate": 0.0009961458326916623, "loss": 2.3086, "step": 337 }, { "epoch": 0.07, "learning_rate": 0.000996104993130983, "loss": 2.3711, "step": 338 }, { "epoch": 0.07, "learning_rate": 0.0009960639391828994, "loss": 2.3672, "step": 339 }, { "epoch": 0.07, "learning_rate": 0.0009960226708651521, "loss": 2.4043, "step": 340 }, { "epoch": 0.07, "learning_rate": 0.000995981188195575, "loss": 2.3828, "step": 341 }, { "epoch": 0.07, "learning_rate": 0.0009959394911920943, "loss": 2.334, "step": 342 }, { "epoch": 0.07, "learning_rate": 0.0009958975798727294, "loss": 2.3262, "step": 343 }, { "epoch": 0.07, "learning_rate": 0.0009958554542555916, "loss": 2.3984, "step": 344 }, { "epoch": 0.07, "learning_rate": 0.000995813114358885, "loss": 2.3945, "step": 345 }, { "epoch": 0.07, "learning_rate": 0.0009957705602009067, "loss": 2.4023, "step": 346 }, { "epoch": 0.07, "learning_rate": 0.000995727791800046, "loss": 2.3379, "step": 347 }, { "epoch": 0.07, "learning_rate": 0.0009956848091747848, "loss": 2.3613, "step": 348 }, { "epoch": 0.07, "learning_rate": 0.000995641612343698, "loss": 2.4043, "step": 349 }, { "epoch": 0.07, "learning_rate": 0.0009955982013254523, "loss": 2.457, "step": 350 }, { "epoch": 0.07, "learning_rate": 0.0009955545761388073, "loss": 2.3184, "step": 351 }, { "epoch": 0.07, "learning_rate": 0.000995510736802616, "loss": 2.3926, "step": 352 }, { "epoch": 0.07, "learning_rate": 0.0009954666833358224, "loss": 2.4277, "step": 353 }, { "epoch": 0.07, "learning_rate": 0.000995422415757464, "loss": 2.332, "step": 354 }, { "epoch": 0.07, "learning_rate": 0.000995377934086671, "loss": 2.3223, "step": 355 }, { "epoch": 0.07, "learning_rate": 0.0009953332383426654, "loss": 2.3457, "step": 356 }, { "epoch": 0.07, "learning_rate": 0.0009952883285447623, "loss": 2.3887, "step": 357 }, { "epoch": 0.07, "learning_rate": 0.0009952432047123688, "loss": 2.3926, "step": 358 }, { "epoch": 0.07, "learning_rate": 0.000995197866864985, "loss": 2.3809, "step": 359 }, { "epoch": 0.07, "learning_rate": 0.000995152315022203, "loss": 2.3398, "step": 360 }, { "epoch": 0.07, "learning_rate": 0.000995106549203708, "loss": 2.3438, "step": 361 }, { "epoch": 0.07, "learning_rate": 0.000995060569429277, "loss": 2.3848, "step": 362 }, { "epoch": 0.07, "learning_rate": 0.0009950143757187799, "loss": 2.3438, "step": 363 }, { "epoch": 0.07, "learning_rate": 0.000994967968092179, "loss": 2.3008, "step": 364 }, { "epoch": 0.07, "learning_rate": 0.0009949213465695287, "loss": 2.3066, "step": 365 }, { "epoch": 0.07, "learning_rate": 0.000994874511170976, "loss": 2.3418, "step": 366 }, { "epoch": 0.07, "learning_rate": 0.0009948274619167607, "loss": 2.5137, "step": 367 }, { "epoch": 0.07, "learning_rate": 0.0009947801988272147, "loss": 2.2285, "step": 368 }, { "epoch": 0.07, "learning_rate": 0.0009947327219227621, "loss": 2.4102, "step": 369 }, { "epoch": 0.08, "learning_rate": 0.0009946850312239199, "loss": 2.3301, "step": 370 }, { "epoch": 0.08, "learning_rate": 0.0009946371267512972, "loss": 2.3398, "step": 371 }, { "epoch": 0.08, "learning_rate": 0.000994589008525595, "loss": 2.3418, "step": 372 }, { "epoch": 0.08, "learning_rate": 0.0009945406765676078, "loss": 2.3398, "step": 373 }, { "epoch": 0.08, "learning_rate": 0.0009944921308982218, "loss": 2.3066, "step": 374 }, { "epoch": 0.08, "learning_rate": 0.000994443371538415, "loss": 2.291, "step": 375 }, { "epoch": 0.08, "learning_rate": 0.0009943943985092588, "loss": 2.3242, "step": 376 }, { "epoch": 0.08, "learning_rate": 0.0009943452118319164, "loss": 2.2852, "step": 377 }, { "epoch": 0.08, "learning_rate": 0.0009942958115276433, "loss": 2.334, "step": 378 }, { "epoch": 0.08, "learning_rate": 0.0009942461976177874, "loss": 2.3008, "step": 379 }, { "epoch": 0.08, "learning_rate": 0.000994196370123789, "loss": 2.3145, "step": 380 }, { "epoch": 0.08, "learning_rate": 0.0009941463290671806, "loss": 2.3496, "step": 381 }, { "epoch": 0.08, "learning_rate": 0.0009940960744695868, "loss": 2.3184, "step": 382 }, { "epoch": 0.08, "learning_rate": 0.000994045606352725, "loss": 2.3535, "step": 383 }, { "epoch": 0.08, "learning_rate": 0.0009939949247384046, "loss": 2.3281, "step": 384 }, { "epoch": 0.08, "learning_rate": 0.0009939440296485268, "loss": 2.3184, "step": 385 }, { "epoch": 0.08, "learning_rate": 0.0009938929211050856, "loss": 2.3984, "step": 386 }, { "epoch": 0.08, "learning_rate": 0.0009938415991301674, "loss": 2.2988, "step": 387 }, { "epoch": 0.08, "learning_rate": 0.0009937900637459502, "loss": 2.291, "step": 388 }, { "epoch": 0.08, "learning_rate": 0.0009937383149747048, "loss": 2.3164, "step": 389 }, { "epoch": 0.08, "learning_rate": 0.000993686352838794, "loss": 2.2695, "step": 390 }, { "epoch": 0.08, "learning_rate": 0.0009936341773606723, "loss": 2.2578, "step": 391 }, { "epoch": 0.08, "learning_rate": 0.0009935817885628874, "loss": 2.3574, "step": 392 }, { "epoch": 0.08, "learning_rate": 0.0009935291864680783, "loss": 2.3047, "step": 393 }, { "epoch": 0.08, "learning_rate": 0.0009934763710989771, "loss": 2.2793, "step": 394 }, { "epoch": 0.08, "learning_rate": 0.000993423342478407, "loss": 2.3105, "step": 395 }, { "epoch": 0.08, "learning_rate": 0.0009933701006292837, "loss": 2.3262, "step": 396 }, { "epoch": 0.08, "learning_rate": 0.0009933166455746158, "loss": 2.3613, "step": 397 }, { "epoch": 0.08, "learning_rate": 0.0009932629773375028, "loss": 2.2852, "step": 398 }, { "epoch": 0.08, "learning_rate": 0.0009932090959411375, "loss": 2.3262, "step": 399 }, { "epoch": 0.08, "learning_rate": 0.000993155001408804, "loss": 2.3848, "step": 400 }, { "epoch": 0.08, "learning_rate": 0.0009931006937638786, "loss": 2.2871, "step": 401 }, { "epoch": 0.08, "learning_rate": 0.00099304617302983, "loss": 2.375, "step": 402 }, { "epoch": 0.08, "learning_rate": 0.0009929914392302192, "loss": 2.3398, "step": 403 }, { "epoch": 0.08, "learning_rate": 0.0009929364923886986, "loss": 2.2695, "step": 404 }, { "epoch": 0.08, "learning_rate": 0.000992881332529013, "loss": 2.373, "step": 405 }, { "epoch": 0.08, "learning_rate": 0.0009928259596749993, "loss": 2.3828, "step": 406 }, { "epoch": 0.08, "learning_rate": 0.0009927703738505861, "loss": 2.2715, "step": 407 }, { "epoch": 0.08, "learning_rate": 0.000992714575079795, "loss": 2.2832, "step": 408 }, { "epoch": 0.08, "learning_rate": 0.0009926585633867385, "loss": 2.2715, "step": 409 }, { "epoch": 0.08, "learning_rate": 0.0009926023387956215, "loss": 2.3086, "step": 410 }, { "epoch": 0.08, "learning_rate": 0.0009925459013307412, "loss": 2.3242, "step": 411 }, { "epoch": 0.08, "learning_rate": 0.0009924892510164863, "loss": 2.3301, "step": 412 }, { "epoch": 0.08, "learning_rate": 0.0009924323878773378, "loss": 2.3105, "step": 413 }, { "epoch": 0.08, "learning_rate": 0.000992375311937869, "loss": 2.2969, "step": 414 }, { "epoch": 0.08, "learning_rate": 0.000992318023222744, "loss": 2.3613, "step": 415 }, { "epoch": 0.08, "learning_rate": 0.0009922605217567203, "loss": 2.3242, "step": 416 }, { "epoch": 0.08, "learning_rate": 0.0009922028075646464, "loss": 2.3457, "step": 417 }, { "epoch": 0.08, "learning_rate": 0.000992144880671463, "loss": 2.2715, "step": 418 }, { "epoch": 0.09, "learning_rate": 0.0009920867411022025, "loss": 2.3359, "step": 419 }, { "epoch": 0.09, "learning_rate": 0.0009920283888819898, "loss": 2.3457, "step": 420 }, { "epoch": 0.09, "learning_rate": 0.0009919698240360409, "loss": 2.2637, "step": 421 }, { "epoch": 0.09, "learning_rate": 0.0009919110465896643, "loss": 2.2676, "step": 422 }, { "epoch": 0.09, "learning_rate": 0.0009918520565682604, "loss": 2.4023, "step": 423 }, { "epoch": 0.09, "learning_rate": 0.0009917928539973206, "loss": 2.3242, "step": 424 }, { "epoch": 0.09, "learning_rate": 0.0009917334389024292, "loss": 2.3164, "step": 425 }, { "epoch": 0.09, "learning_rate": 0.0009916738113092617, "loss": 2.2578, "step": 426 }, { "epoch": 0.09, "learning_rate": 0.000991613971243586, "loss": 2.3691, "step": 427 }, { "epoch": 0.09, "learning_rate": 0.000991553918731261, "loss": 2.332, "step": 428 }, { "epoch": 0.09, "learning_rate": 0.0009914936537982382, "loss": 2.3691, "step": 429 }, { "epoch": 0.09, "learning_rate": 0.0009914331764705605, "loss": 2.2793, "step": 430 }, { "epoch": 0.09, "learning_rate": 0.0009913724867743623, "loss": 2.2539, "step": 431 }, { "epoch": 0.09, "learning_rate": 0.0009913115847358708, "loss": 2.3457, "step": 432 }, { "epoch": 0.09, "learning_rate": 0.0009912504703814037, "loss": 2.3398, "step": 433 }, { "epoch": 0.09, "learning_rate": 0.000991189143737371, "loss": 2.3164, "step": 434 }, { "epoch": 0.09, "learning_rate": 0.000991127604830275, "loss": 2.3613, "step": 435 }, { "epoch": 0.09, "learning_rate": 0.0009910658536867085, "loss": 2.2715, "step": 436 }, { "epoch": 0.09, "learning_rate": 0.0009910038903333575, "loss": 2.2871, "step": 437 }, { "epoch": 0.09, "learning_rate": 0.000990941714796998, "loss": 2.3203, "step": 438 }, { "epoch": 0.09, "learning_rate": 0.0009908793271044996, "loss": 2.3457, "step": 439 }, { "epoch": 0.09, "learning_rate": 0.000990816727282822, "loss": 2.3418, "step": 440 }, { "epoch": 0.09, "learning_rate": 0.0009907539153590173, "loss": 2.293, "step": 441 }, { "epoch": 0.09, "learning_rate": 0.000990690891360229, "loss": 2.1924, "step": 442 }, { "epoch": 0.09, "learning_rate": 0.0009906276553136924, "loss": 2.3984, "step": 443 }, { "epoch": 0.09, "learning_rate": 0.0009905642072467345, "loss": 2.3496, "step": 444 }, { "epoch": 0.09, "learning_rate": 0.0009905005471867738, "loss": 2.3301, "step": 445 }, { "epoch": 0.09, "learning_rate": 0.0009904366751613204, "loss": 2.3223, "step": 446 }, { "epoch": 0.09, "learning_rate": 0.000990372591197976, "loss": 2.3086, "step": 447 }, { "epoch": 0.09, "learning_rate": 0.000990308295324434, "loss": 2.2871, "step": 448 }, { "epoch": 0.09, "learning_rate": 0.0009902437875684794, "loss": 2.3242, "step": 449 }, { "epoch": 0.09, "learning_rate": 0.0009901790679579883, "loss": 2.3301, "step": 450 }, { "epoch": 0.09, "learning_rate": 0.0009901141365209292, "loss": 2.3281, "step": 451 }, { "epoch": 0.09, "learning_rate": 0.0009900489932853612, "loss": 2.3203, "step": 452 }, { "epoch": 0.09, "learning_rate": 0.0009899836382794357, "loss": 2.332, "step": 453 }, { "epoch": 0.09, "learning_rate": 0.000989918071531395, "loss": 2.3379, "step": 454 }, { "epoch": 0.09, "learning_rate": 0.0009898522930695732, "loss": 2.3008, "step": 455 }, { "epoch": 0.09, "learning_rate": 0.0009897863029223962, "loss": 2.2812, "step": 456 }, { "epoch": 0.09, "learning_rate": 0.000989720101118381, "loss": 2.3066, "step": 457 }, { "epoch": 0.09, "learning_rate": 0.000989653687686136, "loss": 2.3105, "step": 458 }, { "epoch": 0.09, "learning_rate": 0.000989587062654361, "loss": 2.2754, "step": 459 }, { "epoch": 0.09, "learning_rate": 0.0009895202260518476, "loss": 2.3203, "step": 460 }, { "epoch": 0.09, "learning_rate": 0.0009894531779074788, "loss": 2.2871, "step": 461 }, { "epoch": 0.09, "learning_rate": 0.0009893859182502286, "loss": 2.2891, "step": 462 }, { "epoch": 0.09, "learning_rate": 0.0009893184471091632, "loss": 2.3301, "step": 463 }, { "epoch": 0.09, "learning_rate": 0.0009892507645134388, "loss": 2.3301, "step": 464 }, { "epoch": 0.09, "learning_rate": 0.0009891828704923044, "loss": 2.3047, "step": 465 }, { "epoch": 0.09, "learning_rate": 0.0009891147650750998, "loss": 2.334, "step": 466 }, { "epoch": 0.09, "learning_rate": 0.000989046448291256, "loss": 2.293, "step": 467 }, { "epoch": 0.09, "learning_rate": 0.0009889779201702955, "loss": 2.2344, "step": 468 }, { "epoch": 0.1, "learning_rate": 0.0009889091807418321, "loss": 2.2598, "step": 469 }, { "epoch": 0.1, "learning_rate": 0.0009888402300355713, "loss": 2.3066, "step": 470 }, { "epoch": 0.1, "learning_rate": 0.000988771068081309, "loss": 2.3574, "step": 471 }, { "epoch": 0.1, "learning_rate": 0.0009887016949089334, "loss": 2.3359, "step": 472 }, { "epoch": 0.1, "learning_rate": 0.000988632110548423, "loss": 2.3047, "step": 473 }, { "epoch": 0.1, "learning_rate": 0.0009885623150298487, "loss": 2.3027, "step": 474 }, { "epoch": 0.1, "learning_rate": 0.0009884923083833715, "loss": 2.2129, "step": 475 }, { "epoch": 0.1, "learning_rate": 0.0009884220906392442, "loss": 2.2812, "step": 476 }, { "epoch": 0.1, "learning_rate": 0.000988351661827811, "loss": 2.3574, "step": 477 }, { "epoch": 0.1, "learning_rate": 0.0009882810219795071, "loss": 2.3535, "step": 478 }, { "epoch": 0.1, "learning_rate": 0.0009882101711248586, "loss": 2.3652, "step": 479 }, { "epoch": 0.1, "learning_rate": 0.0009881391092944835, "loss": 2.2695, "step": 480 }, { "epoch": 0.1, "learning_rate": 0.0009880678365190901, "loss": 2.2812, "step": 481 }, { "epoch": 0.1, "learning_rate": 0.0009879963528294786, "loss": 2.3555, "step": 482 }, { "epoch": 0.1, "learning_rate": 0.00098792465825654, "loss": 2.3086, "step": 483 }, { "epoch": 0.1, "learning_rate": 0.0009878527528312563, "loss": 2.2207, "step": 484 }, { "epoch": 0.1, "learning_rate": 0.0009877806365847011, "loss": 2.3301, "step": 485 }, { "epoch": 0.1, "learning_rate": 0.0009877083095480386, "loss": 2.2422, "step": 486 }, { "epoch": 0.1, "learning_rate": 0.0009876357717525245, "loss": 2.3301, "step": 487 }, { "epoch": 0.1, "learning_rate": 0.0009875630232295048, "loss": 2.2734, "step": 488 }, { "epoch": 0.1, "learning_rate": 0.0009874900640104178, "loss": 2.2754, "step": 489 }, { "epoch": 0.1, "learning_rate": 0.0009874168941267918, "loss": 2.2441, "step": 490 }, { "epoch": 0.1, "learning_rate": 0.0009873435136102467, "loss": 2.293, "step": 491 }, { "epoch": 0.1, "learning_rate": 0.000987269922492493, "loss": 2.3242, "step": 492 }, { "epoch": 0.1, "learning_rate": 0.0009871961208053328, "loss": 2.2188, "step": 493 }, { "epoch": 0.1, "learning_rate": 0.0009871221085806586, "loss": 2.2441, "step": 494 }, { "epoch": 0.1, "learning_rate": 0.0009870478858504544, "loss": 2.3867, "step": 495 }, { "epoch": 0.1, "learning_rate": 0.0009869734526467945, "loss": 2.3672, "step": 496 }, { "epoch": 0.1, "learning_rate": 0.0009868988090018448, "loss": 2.2422, "step": 497 }, { "epoch": 0.1, "learning_rate": 0.0009868239549478618, "loss": 2.2676, "step": 498 }, { "epoch": 0.1, "learning_rate": 0.0009867488905171934, "loss": 2.2363, "step": 499 }, { "epoch": 0.1, "learning_rate": 0.0009866736157422775, "loss": 2.3379, "step": 500 }, { "epoch": 0.1, "learning_rate": 0.0009865981306556436, "loss": 2.2812, "step": 501 }, { "epoch": 0.1, "learning_rate": 0.0009865224352899118, "loss": 2.3438, "step": 502 }, { "epoch": 0.1, "learning_rate": 0.0009864465296777938, "loss": 2.2598, "step": 503 }, { "epoch": 0.1, "learning_rate": 0.0009863704138520907, "loss": 2.3184, "step": 504 }, { "epoch": 0.1, "learning_rate": 0.0009862940878456958, "loss": 2.2637, "step": 505 }, { "epoch": 0.1, "learning_rate": 0.0009862175516915925, "loss": 2.2715, "step": 506 }, { "epoch": 0.1, "learning_rate": 0.0009861408054228555, "loss": 2.3262, "step": 507 }, { "epoch": 0.1, "learning_rate": 0.0009860638490726498, "loss": 2.291, "step": 508 }, { "epoch": 0.1, "learning_rate": 0.0009859866826742313, "loss": 2.2617, "step": 509 }, { "epoch": 0.1, "learning_rate": 0.0009859093062609468, "loss": 2.3125, "step": 510 }, { "epoch": 0.1, "learning_rate": 0.0009858317198662343, "loss": 2.2949, "step": 511 }, { "epoch": 0.1, "learning_rate": 0.0009857539235236215, "loss": 2.3184, "step": 512 }, { "epoch": 0.1, "learning_rate": 0.0009856759172667277, "loss": 2.291, "step": 513 }, { "epoch": 0.1, "learning_rate": 0.0009855977011292625, "loss": 2.2812, "step": 514 }, { "epoch": 0.1, "learning_rate": 0.0009855192751450266, "loss": 2.2148, "step": 515 }, { "epoch": 0.1, "learning_rate": 0.0009854406393479105, "loss": 2.2383, "step": 516 }, { "epoch": 0.1, "learning_rate": 0.0009853617937718967, "loss": 2.2617, "step": 517 }, { "epoch": 0.11, "learning_rate": 0.000985282738451057, "loss": 2.2559, "step": 518 }, { "epoch": 0.11, "learning_rate": 0.0009852034734195547, "loss": 2.2305, "step": 519 }, { "epoch": 0.11, "learning_rate": 0.0009851239987116436, "loss": 2.373, "step": 520 }, { "epoch": 0.11, "learning_rate": 0.000985044314361668, "loss": 2.2734, "step": 521 }, { "epoch": 0.11, "learning_rate": 0.0009849644204040625, "loss": 2.3047, "step": 522 }, { "epoch": 0.11, "learning_rate": 0.000984884316873353, "loss": 2.2539, "step": 523 }, { "epoch": 0.11, "learning_rate": 0.000984804003804155, "loss": 2.3418, "step": 524 }, { "epoch": 0.11, "learning_rate": 0.0009847234812311756, "loss": 2.4023, "step": 525 }, { "epoch": 0.11, "learning_rate": 0.0009846427491892117, "loss": 2.3867, "step": 526 }, { "epoch": 0.11, "learning_rate": 0.0009845618077131508, "loss": 2.2559, "step": 527 }, { "epoch": 0.11, "learning_rate": 0.0009844806568379714, "loss": 2.2637, "step": 528 }, { "epoch": 0.11, "learning_rate": 0.0009843992965987418, "loss": 2.2734, "step": 529 }, { "epoch": 0.11, "learning_rate": 0.0009843177270306212, "loss": 2.2793, "step": 530 }, { "epoch": 0.11, "learning_rate": 0.0009842359481688592, "loss": 2.3066, "step": 531 }, { "epoch": 0.11, "learning_rate": 0.0009841539600487959, "loss": 2.3555, "step": 532 }, { "epoch": 0.11, "learning_rate": 0.0009840717627058616, "loss": 2.2832, "step": 533 }, { "epoch": 0.11, "learning_rate": 0.0009839893561755773, "loss": 2.2979, "step": 534 }, { "epoch": 0.11, "learning_rate": 0.0009839067404935542, "loss": 2.3535, "step": 535 }, { "epoch": 0.11, "learning_rate": 0.0009838239156954938, "loss": 2.1562, "step": 536 }, { "epoch": 0.11, "learning_rate": 0.0009837408818171882, "loss": 2.3086, "step": 537 }, { "epoch": 0.11, "learning_rate": 0.00098365763889452, "loss": 2.2197, "step": 538 }, { "epoch": 0.11, "learning_rate": 0.0009835741869634615, "loss": 2.3125, "step": 539 }, { "epoch": 0.11, "learning_rate": 0.000983490526060076, "loss": 2.2422, "step": 540 }, { "epoch": 0.11, "learning_rate": 0.0009834066562205165, "loss": 2.3359, "step": 541 }, { "epoch": 0.11, "learning_rate": 0.000983322577481027, "loss": 2.248, "step": 542 }, { "epoch": 0.11, "learning_rate": 0.0009832382898779413, "loss": 2.2363, "step": 543 }, { "epoch": 0.11, "learning_rate": 0.0009831537934476835, "loss": 2.3076, "step": 544 }, { "epoch": 0.11, "learning_rate": 0.0009830690882267677, "loss": 2.2695, "step": 545 }, { "epoch": 0.11, "learning_rate": 0.000982984174251799, "loss": 2.2812, "step": 546 }, { "epoch": 0.11, "learning_rate": 0.000982899051559472, "loss": 2.2734, "step": 547 }, { "epoch": 0.11, "learning_rate": 0.0009828137201865715, "loss": 2.2754, "step": 548 }, { "epoch": 0.11, "learning_rate": 0.0009827281801699731, "loss": 2.2949, "step": 549 }, { "epoch": 0.11, "learning_rate": 0.000982642431546642, "loss": 2.2773, "step": 550 }, { "epoch": 0.11, "learning_rate": 0.0009825564743536335, "loss": 2.2793, "step": 551 }, { "epoch": 0.11, "learning_rate": 0.0009824703086280938, "loss": 2.3086, "step": 552 }, { "epoch": 0.11, "learning_rate": 0.000982383934407258, "loss": 2.2168, "step": 553 }, { "epoch": 0.11, "learning_rate": 0.0009822973517284524, "loss": 2.2568, "step": 554 }, { "epoch": 0.11, "learning_rate": 0.0009822105606290927, "loss": 2.2148, "step": 555 }, { "epoch": 0.11, "learning_rate": 0.000982123561146685, "loss": 2.334, "step": 556 }, { "epoch": 0.11, "learning_rate": 0.0009820363533188255, "loss": 2.3047, "step": 557 }, { "epoch": 0.11, "learning_rate": 0.0009819489371832003, "loss": 2.209, "step": 558 }, { "epoch": 0.11, "learning_rate": 0.000981861312777585, "loss": 2.2812, "step": 559 }, { "epoch": 0.11, "learning_rate": 0.0009817734801398463, "loss": 2.2949, "step": 560 }, { "epoch": 0.11, "learning_rate": 0.0009816854393079402, "loss": 2.3555, "step": 561 }, { "epoch": 0.11, "learning_rate": 0.0009815971903199127, "loss": 2.2852, "step": 562 }, { "epoch": 0.11, "learning_rate": 0.0009815087332138996, "loss": 2.3086, "step": 563 }, { "epoch": 0.11, "learning_rate": 0.0009814200680281272, "loss": 2.252, "step": 564 }, { "epoch": 0.11, "learning_rate": 0.0009813311948009109, "loss": 2.2715, "step": 565 }, { "epoch": 0.11, "learning_rate": 0.000981242113570657, "loss": 2.1299, "step": 566 }, { "epoch": 0.12, "learning_rate": 0.000981152824375861, "loss": 2.2148, "step": 567 }, { "epoch": 0.12, "learning_rate": 0.0009810633272551085, "loss": 2.3496, "step": 568 }, { "epoch": 0.12, "learning_rate": 0.0009809736222470748, "loss": 2.2871, "step": 569 }, { "epoch": 0.12, "learning_rate": 0.0009808837093905254, "loss": 2.3086, "step": 570 }, { "epoch": 0.12, "learning_rate": 0.0009807935887243147, "loss": 2.2559, "step": 571 }, { "epoch": 0.12, "learning_rate": 0.000980703260287388, "loss": 2.2461, "step": 572 }, { "epoch": 0.12, "learning_rate": 0.00098061272411878, "loss": 2.2559, "step": 573 }, { "epoch": 0.12, "learning_rate": 0.000980521980257615, "loss": 2.2949, "step": 574 }, { "epoch": 0.12, "learning_rate": 0.000980431028743107, "loss": 2.2637, "step": 575 }, { "epoch": 0.12, "learning_rate": 0.0009803398696145603, "loss": 2.2129, "step": 576 }, { "epoch": 0.12, "learning_rate": 0.0009802485029113681, "loss": 2.3418, "step": 577 }, { "epoch": 0.12, "learning_rate": 0.000980156928673014, "loss": 2.2285, "step": 578 }, { "epoch": 0.12, "learning_rate": 0.0009800651469390705, "loss": 2.291, "step": 579 }, { "epoch": 0.12, "learning_rate": 0.000979973157749201, "loss": 2.3086, "step": 580 }, { "epoch": 0.12, "learning_rate": 0.0009798809611431575, "loss": 2.1855, "step": 581 }, { "epoch": 0.12, "learning_rate": 0.0009797885571607818, "loss": 2.2227, "step": 582 }, { "epoch": 0.12, "learning_rate": 0.0009796959458420057, "loss": 2.3242, "step": 583 }, { "epoch": 0.12, "learning_rate": 0.00097960312722685, "loss": 2.2676, "step": 584 }, { "epoch": 0.12, "learning_rate": 0.0009795101013554258, "loss": 2.3027, "step": 585 }, { "epoch": 0.12, "learning_rate": 0.0009794168682679333, "loss": 2.2676, "step": 586 }, { "epoch": 0.12, "learning_rate": 0.0009793234280046626, "loss": 2.2188, "step": 587 }, { "epoch": 0.12, "learning_rate": 0.0009792297806059927, "loss": 2.2422, "step": 588 }, { "epoch": 0.12, "learning_rate": 0.0009791359261123926, "loss": 2.334, "step": 589 }, { "epoch": 0.12, "learning_rate": 0.0009790418645644209, "loss": 2.2441, "step": 590 }, { "epoch": 0.12, "learning_rate": 0.000978947596002725, "loss": 2.2812, "step": 591 }, { "epoch": 0.12, "learning_rate": 0.0009788531204680429, "loss": 2.2168, "step": 592 }, { "epoch": 0.12, "learning_rate": 0.000978758438001201, "loss": 2.2422, "step": 593 }, { "epoch": 0.12, "learning_rate": 0.0009786635486431152, "loss": 2.2334, "step": 594 }, { "epoch": 0.12, "learning_rate": 0.0009785684524347916, "loss": 2.1758, "step": 595 }, { "epoch": 0.12, "learning_rate": 0.0009784731494173252, "loss": 2.2969, "step": 596 }, { "epoch": 0.12, "learning_rate": 0.0009783776396318999, "loss": 2.2246, "step": 597 }, { "epoch": 0.12, "learning_rate": 0.0009782819231197897, "loss": 2.2891, "step": 598 }, { "epoch": 0.12, "learning_rate": 0.0009781859999223577, "loss": 2.3086, "step": 599 }, { "epoch": 0.12, "learning_rate": 0.0009780898700810562, "loss": 2.3086, "step": 600 }, { "epoch": 0.12, "learning_rate": 0.0009779935336374267, "loss": 2.2402, "step": 601 }, { "epoch": 0.12, "learning_rate": 0.0009778969906331005, "loss": 2.2617, "step": 602 }, { "epoch": 0.12, "learning_rate": 0.0009778002411097975, "loss": 2.2559, "step": 603 }, { "epoch": 0.12, "learning_rate": 0.0009777032851093273, "loss": 2.2871, "step": 604 }, { "epoch": 0.12, "learning_rate": 0.0009776061226735883, "loss": 2.2217, "step": 605 }, { "epoch": 0.12, "learning_rate": 0.000977508753844569, "loss": 2.2969, "step": 606 }, { "epoch": 0.12, "learning_rate": 0.000977411178664346, "loss": 2.3379, "step": 607 }, { "epoch": 0.12, "learning_rate": 0.0009773133971750856, "loss": 2.4512, "step": 608 }, { "epoch": 0.12, "learning_rate": 0.0009772154094190434, "loss": 2.2822, "step": 609 }, { "epoch": 0.12, "learning_rate": 0.0009771172154385637, "loss": 2.3047, "step": 610 }, { "epoch": 0.12, "learning_rate": 0.0009770188152760804, "loss": 2.2461, "step": 611 }, { "epoch": 0.12, "learning_rate": 0.000976920208974116, "loss": 2.1621, "step": 612 }, { "epoch": 0.12, "learning_rate": 0.0009768213965752827, "loss": 2.3457, "step": 613 }, { "epoch": 0.12, "learning_rate": 0.000976722378122281, "loss": 2.2871, "step": 614 }, { "epoch": 0.12, "learning_rate": 0.0009766231536579013, "loss": 2.2617, "step": 615 }, { "epoch": 0.13, "learning_rate": 0.0009765237232250222, "loss": 2.2559, "step": 616 }, { "epoch": 0.13, "learning_rate": 0.0009764240868666119, "loss": 2.2871, "step": 617 }, { "epoch": 0.13, "learning_rate": 0.0009763242446257272, "loss": 2.2109, "step": 618 }, { "epoch": 0.13, "learning_rate": 0.0009762241965455141, "loss": 2.3438, "step": 619 }, { "epoch": 0.13, "learning_rate": 0.0009761239426692076, "loss": 2.2676, "step": 620 }, { "epoch": 0.13, "learning_rate": 0.0009760234830401316, "loss": 2.2109, "step": 621 }, { "epoch": 0.13, "learning_rate": 0.0009759228177016985, "loss": 2.3457, "step": 622 }, { "epoch": 0.13, "learning_rate": 0.0009758219466974104, "loss": 2.2461, "step": 623 }, { "epoch": 0.13, "learning_rate": 0.0009757208700708575, "loss": 2.2891, "step": 624 }, { "epoch": 0.13, "learning_rate": 0.0009756195878657192, "loss": 2.2441, "step": 625 }, { "epoch": 0.13, "learning_rate": 0.0009755181001257637, "loss": 2.293, "step": 626 }, { "epoch": 0.13, "learning_rate": 0.0009754164068948481, "loss": 2.25, "step": 627 }, { "epoch": 0.13, "learning_rate": 0.0009753145082169183, "loss": 2.3086, "step": 628 }, { "epoch": 0.13, "learning_rate": 0.0009752124041360089, "loss": 2.2227, "step": 629 }, { "epoch": 0.13, "learning_rate": 0.0009751100946962432, "loss": 2.1973, "step": 630 }, { "epoch": 0.13, "learning_rate": 0.0009750075799418332, "loss": 2.2461, "step": 631 }, { "epoch": 0.13, "learning_rate": 0.00097490485991708, "loss": 2.1953, "step": 632 }, { "epoch": 0.13, "learning_rate": 0.0009748019346663731, "loss": 2.1914, "step": 633 }, { "epoch": 0.13, "learning_rate": 0.0009746988042341906, "loss": 2.2656, "step": 634 }, { "epoch": 0.13, "learning_rate": 0.0009745954686650997, "loss": 2.2617, "step": 635 }, { "epoch": 0.13, "learning_rate": 0.0009744919280037555, "loss": 2.2109, "step": 636 }, { "epoch": 0.13, "learning_rate": 0.0009743881822949026, "loss": 2.2812, "step": 637 }, { "epoch": 0.13, "learning_rate": 0.0009742842315833735, "loss": 2.2646, "step": 638 }, { "epoch": 0.13, "learning_rate": 0.0009741800759140898, "loss": 2.25, "step": 639 }, { "epoch": 0.13, "learning_rate": 0.0009740757153320614, "loss": 2.2559, "step": 640 }, { "epoch": 0.13, "learning_rate": 0.0009739711498823868, "loss": 2.2266, "step": 641 }, { "epoch": 0.13, "learning_rate": 0.0009738663796102529, "loss": 2.2158, "step": 642 }, { "epoch": 0.13, "learning_rate": 0.0009737614045609355, "loss": 2.1758, "step": 643 }, { "epoch": 0.13, "learning_rate": 0.0009736562247797984, "loss": 2.2793, "step": 644 }, { "epoch": 0.13, "learning_rate": 0.0009735508403122944, "loss": 2.2402, "step": 645 }, { "epoch": 0.13, "learning_rate": 0.0009734452512039642, "loss": 2.2031, "step": 646 }, { "epoch": 0.13, "learning_rate": 0.0009733394575004373, "loss": 2.2441, "step": 647 }, { "epoch": 0.13, "learning_rate": 0.0009732334592474314, "loss": 2.2637, "step": 648 }, { "epoch": 0.13, "learning_rate": 0.0009731272564907532, "loss": 2.2383, "step": 649 }, { "epoch": 0.13, "learning_rate": 0.0009730208492762966, "loss": 2.293, "step": 650 }, { "epoch": 0.13, "learning_rate": 0.0009729142376500449, "loss": 2.25, "step": 651 }, { "epoch": 0.13, "learning_rate": 0.0009728074216580694, "loss": 2.293, "step": 652 }, { "epoch": 0.13, "learning_rate": 0.0009727004013465296, "loss": 2.1875, "step": 653 }, { "epoch": 0.13, "learning_rate": 0.0009725931767616733, "loss": 2.291, "step": 654 }, { "epoch": 0.13, "learning_rate": 0.000972485747949837, "loss": 2.1426, "step": 655 }, { "epoch": 0.13, "learning_rate": 0.0009723781149574449, "loss": 2.3496, "step": 656 }, { "epoch": 0.13, "learning_rate": 0.0009722702778310095, "loss": 2.2207, "step": 657 }, { "epoch": 0.13, "learning_rate": 0.0009721622366171318, "loss": 2.3145, "step": 658 }, { "epoch": 0.13, "learning_rate": 0.0009720539913625008, "loss": 2.2734, "step": 659 }, { "epoch": 0.13, "learning_rate": 0.0009719455421138938, "loss": 2.2441, "step": 660 }, { "epoch": 0.13, "learning_rate": 0.0009718368889181764, "loss": 2.2598, "step": 661 }, { "epoch": 0.13, "learning_rate": 0.0009717280318223018, "loss": 2.291, "step": 662 }, { "epoch": 0.13, "learning_rate": 0.0009716189708733116, "loss": 2.2031, "step": 663 }, { "epoch": 0.13, "learning_rate": 0.0009715097061183357, "loss": 2.2598, "step": 664 }, { "epoch": 0.13, "learning_rate": 0.000971400237604592, "loss": 2.2344, "step": 665 }, { "epoch": 0.14, "learning_rate": 0.0009712905653793862, "loss": 2.2949, "step": 666 }, { "epoch": 0.14, "learning_rate": 0.0009711806894901123, "loss": 2.1875, "step": 667 }, { "epoch": 0.14, "learning_rate": 0.0009710706099842519, "loss": 2.2285, "step": 668 }, { "epoch": 0.14, "learning_rate": 0.0009709603269093752, "loss": 2.2285, "step": 669 }, { "epoch": 0.14, "learning_rate": 0.00097084984031314, "loss": 2.2266, "step": 670 }, { "epoch": 0.14, "learning_rate": 0.0009707391502432921, "loss": 2.2559, "step": 671 }, { "epoch": 0.14, "learning_rate": 0.0009706282567476651, "loss": 2.2695, "step": 672 }, { "epoch": 0.14, "learning_rate": 0.0009705171598741808, "loss": 2.1973, "step": 673 }, { "epoch": 0.14, "learning_rate": 0.0009704058596708488, "loss": 2.2383, "step": 674 }, { "epoch": 0.14, "learning_rate": 0.0009702943561857661, "loss": 2.2236, "step": 675 }, { "epoch": 0.14, "learning_rate": 0.0009701826494671184, "loss": 2.2344, "step": 676 }, { "epoch": 0.14, "learning_rate": 0.0009700707395631787, "loss": 2.1572, "step": 677 }, { "epoch": 0.14, "learning_rate": 0.0009699586265223077, "loss": 2.2129, "step": 678 }, { "epoch": 0.14, "learning_rate": 0.0009698463103929542, "loss": 2.2402, "step": 679 }, { "epoch": 0.14, "learning_rate": 0.0009697337912236546, "loss": 2.2461, "step": 680 }, { "epoch": 0.14, "learning_rate": 0.000969621069063033, "loss": 2.2637, "step": 681 }, { "epoch": 0.14, "learning_rate": 0.0009695081439598014, "loss": 2.2227, "step": 682 }, { "epoch": 0.14, "learning_rate": 0.0009693950159627594, "loss": 2.2559, "step": 683 }, { "epoch": 0.14, "learning_rate": 0.0009692816851207941, "loss": 2.3496, "step": 684 }, { "epoch": 0.14, "learning_rate": 0.0009691681514828806, "loss": 2.2051, "step": 685 }, { "epoch": 0.14, "learning_rate": 0.0009690544150980814, "loss": 2.2695, "step": 686 }, { "epoch": 0.14, "learning_rate": 0.0009689404760155465, "loss": 2.2773, "step": 687 }, { "epoch": 0.14, "learning_rate": 0.0009688263342845139, "loss": 2.2285, "step": 688 }, { "epoch": 0.14, "learning_rate": 0.000968711989954309, "loss": 2.1621, "step": 689 }, { "epoch": 0.14, "learning_rate": 0.0009685974430743445, "loss": 2.3047, "step": 690 }, { "epoch": 0.14, "learning_rate": 0.0009684826936941209, "loss": 2.2119, "step": 691 }, { "epoch": 0.14, "learning_rate": 0.0009683677418632262, "loss": 2.1865, "step": 692 }, { "epoch": 0.14, "learning_rate": 0.0009682525876313357, "loss": 2.1309, "step": 693 }, { "epoch": 0.14, "learning_rate": 0.0009681372310482123, "loss": 2.2227, "step": 694 }, { "epoch": 0.14, "learning_rate": 0.0009680216721637065, "loss": 2.2344, "step": 695 }, { "epoch": 0.14, "learning_rate": 0.000967905911027756, "loss": 2.1914, "step": 696 }, { "epoch": 0.14, "learning_rate": 0.0009677899476903856, "loss": 2.2734, "step": 697 }, { "epoch": 0.14, "learning_rate": 0.0009676737822017083, "loss": 2.252, "step": 698 }, { "epoch": 0.14, "learning_rate": 0.0009675574146119239, "loss": 2.3047, "step": 699 }, { "epoch": 0.14, "learning_rate": 0.0009674408449713193, "loss": 2.25, "step": 700 }, { "epoch": 0.14, "learning_rate": 0.0009673240733302695, "loss": 2.2344, "step": 701 }, { "epoch": 0.14, "learning_rate": 0.0009672070997392361, "loss": 2.2412, "step": 702 }, { "epoch": 0.14, "learning_rate": 0.0009670899242487682, "loss": 2.2275, "step": 703 }, { "epoch": 0.14, "learning_rate": 0.0009669725469095021, "loss": 2.2051, "step": 704 }, { "epoch": 0.14, "learning_rate": 0.0009668549677721616, "loss": 2.2598, "step": 705 }, { "epoch": 0.14, "learning_rate": 0.0009667371868875574, "loss": 2.2793, "step": 706 }, { "epoch": 0.14, "learning_rate": 0.0009666192043065873, "loss": 2.1738, "step": 707 }, { "epoch": 0.14, "learning_rate": 0.0009665010200802367, "loss": 2.2734, "step": 708 }, { "epoch": 0.14, "learning_rate": 0.0009663826342595776, "loss": 2.2305, "step": 709 }, { "epoch": 0.14, "learning_rate": 0.0009662640468957696, "loss": 2.2568, "step": 710 }, { "epoch": 0.14, "learning_rate": 0.0009661452580400591, "loss": 2.3086, "step": 711 }, { "epoch": 0.14, "learning_rate": 0.0009660262677437798, "loss": 2.2109, "step": 712 }, { "epoch": 0.14, "learning_rate": 0.000965907076058352, "loss": 2.2598, "step": 713 }, { "epoch": 0.14, "learning_rate": 0.0009657876830352837, "loss": 2.2598, "step": 714 }, { "epoch": 0.15, "learning_rate": 0.0009656680887261692, "loss": 2.1895, "step": 715 }, { "epoch": 0.15, "learning_rate": 0.0009655482931826905, "loss": 2.2812, "step": 716 }, { "epoch": 0.15, "learning_rate": 0.0009654282964566158, "loss": 2.2539, "step": 717 }, { "epoch": 0.15, "learning_rate": 0.0009653080985998009, "loss": 2.2432, "step": 718 }, { "epoch": 0.15, "learning_rate": 0.000965187699664188, "loss": 2.1641, "step": 719 }, { "epoch": 0.15, "learning_rate": 0.0009650670997018067, "loss": 2.2188, "step": 720 }, { "epoch": 0.15, "learning_rate": 0.0009649462987647732, "loss": 2.2598, "step": 721 }, { "epoch": 0.15, "learning_rate": 0.0009648252969052904, "loss": 2.1973, "step": 722 }, { "epoch": 0.15, "learning_rate": 0.0009647040941756482, "loss": 2.1973, "step": 723 }, { "epoch": 0.15, "learning_rate": 0.0009645826906282233, "loss": 2.1875, "step": 724 }, { "epoch": 0.15, "learning_rate": 0.0009644610863154795, "loss": 2.252, "step": 725 }, { "epoch": 0.15, "learning_rate": 0.0009643392812899665, "loss": 2.2188, "step": 726 }, { "epoch": 0.15, "learning_rate": 0.0009642172756043216, "loss": 2.2051, "step": 727 }, { "epoch": 0.15, "learning_rate": 0.0009640950693112684, "loss": 2.2051, "step": 728 }, { "epoch": 0.15, "learning_rate": 0.0009639726624636174, "loss": 2.2617, "step": 729 }, { "epoch": 0.15, "learning_rate": 0.0009638500551142653, "loss": 2.2188, "step": 730 }, { "epoch": 0.15, "learning_rate": 0.0009637272473161962, "loss": 2.2832, "step": 731 }, { "epoch": 0.15, "learning_rate": 0.0009636042391224802, "loss": 2.2383, "step": 732 }, { "epoch": 0.15, "learning_rate": 0.000963481030586274, "loss": 2.1934, "step": 733 }, { "epoch": 0.15, "learning_rate": 0.0009633576217608215, "loss": 2.2402, "step": 734 }, { "epoch": 0.15, "learning_rate": 0.0009632340126994526, "loss": 2.1523, "step": 735 }, { "epoch": 0.15, "learning_rate": 0.0009631102034555839, "loss": 2.252, "step": 736 }, { "epoch": 0.15, "learning_rate": 0.0009629861940827182, "loss": 2.209, "step": 737 }, { "epoch": 0.15, "learning_rate": 0.0009628619846344453, "loss": 2.2188, "step": 738 }, { "epoch": 0.15, "learning_rate": 0.0009627375751644411, "loss": 2.1816, "step": 739 }, { "epoch": 0.15, "learning_rate": 0.0009626129657264681, "loss": 2.2285, "step": 740 }, { "epoch": 0.15, "learning_rate": 0.0009624881563743754, "loss": 2.1992, "step": 741 }, { "epoch": 0.15, "learning_rate": 0.000962363147162098, "loss": 2.1836, "step": 742 }, { "epoch": 0.15, "learning_rate": 0.0009622379381436575, "loss": 2.2285, "step": 743 }, { "epoch": 0.15, "learning_rate": 0.0009621125293731619, "loss": 2.2471, "step": 744 }, { "epoch": 0.15, "learning_rate": 0.0009619869209048058, "loss": 2.2188, "step": 745 }, { "epoch": 0.15, "learning_rate": 0.0009618611127928694, "loss": 2.1914, "step": 746 }, { "epoch": 0.15, "learning_rate": 0.0009617351050917195, "loss": 2.1895, "step": 747 }, { "epoch": 0.15, "learning_rate": 0.0009616088978558099, "loss": 2.21, "step": 748 }, { "epoch": 0.15, "learning_rate": 0.0009614824911396792, "loss": 2.2031, "step": 749 }, { "epoch": 0.15, "learning_rate": 0.0009613558849979534, "loss": 2.2031, "step": 750 }, { "epoch": 0.15, "learning_rate": 0.0009612290794853438, "loss": 2.1602, "step": 751 }, { "epoch": 0.15, "learning_rate": 0.0009611020746566487, "loss": 2.2656, "step": 752 }, { "epoch": 0.15, "learning_rate": 0.000960974870566752, "loss": 2.2559, "step": 753 }, { "epoch": 0.15, "learning_rate": 0.0009608474672706239, "loss": 2.2559, "step": 754 }, { "epoch": 0.15, "learning_rate": 0.0009607198648233204, "loss": 2.2148, "step": 755 }, { "epoch": 0.15, "learning_rate": 0.0009605920632799838, "loss": 2.252, "step": 756 }, { "epoch": 0.15, "learning_rate": 0.0009604640626958427, "loss": 2.2012, "step": 757 }, { "epoch": 0.15, "learning_rate": 0.0009603358631262111, "loss": 2.2812, "step": 758 }, { "epoch": 0.15, "learning_rate": 0.0009602074646264893, "loss": 2.209, "step": 759 }, { "epoch": 0.15, "learning_rate": 0.000960078867252164, "loss": 2.1963, "step": 760 }, { "epoch": 0.15, "learning_rate": 0.000959950071058807, "loss": 2.2539, "step": 761 }, { "epoch": 0.15, "learning_rate": 0.0009598210761020765, "loss": 2.1914, "step": 762 }, { "epoch": 0.15, "learning_rate": 0.0009596918824377168, "loss": 2.1973, "step": 763 }, { "epoch": 0.16, "learning_rate": 0.0009595624901215573, "loss": 2.1689, "step": 764 }, { "epoch": 0.16, "learning_rate": 0.0009594328992095144, "loss": 2.2051, "step": 765 }, { "epoch": 0.16, "learning_rate": 0.0009593031097575893, "loss": 2.2188, "step": 766 }, { "epoch": 0.16, "learning_rate": 0.0009591731218218693, "loss": 2.2461, "step": 767 }, { "epoch": 0.16, "learning_rate": 0.0009590429354585277, "loss": 2.2324, "step": 768 }, { "epoch": 0.16, "learning_rate": 0.0009589125507238233, "loss": 2.1416, "step": 769 }, { "epoch": 0.16, "learning_rate": 0.0009587819676741008, "loss": 2.248, "step": 770 }, { "epoch": 0.16, "learning_rate": 0.0009586511863657906, "loss": 2.2207, "step": 771 }, { "epoch": 0.16, "learning_rate": 0.0009585202068554083, "loss": 2.2637, "step": 772 }, { "epoch": 0.16, "learning_rate": 0.0009583890291995561, "loss": 2.1816, "step": 773 }, { "epoch": 0.16, "learning_rate": 0.0009582576534549208, "loss": 2.166, "step": 774 }, { "epoch": 0.16, "learning_rate": 0.0009581260796782754, "loss": 2.209, "step": 775 }, { "epoch": 0.16, "learning_rate": 0.0009579943079264784, "loss": 2.3008, "step": 776 }, { "epoch": 0.16, "learning_rate": 0.0009578623382564739, "loss": 2.2227, "step": 777 }, { "epoch": 0.16, "learning_rate": 0.0009577301707252913, "loss": 2.2041, "step": 778 }, { "epoch": 0.16, "learning_rate": 0.0009575978053900456, "loss": 2.2227, "step": 779 }, { "epoch": 0.16, "learning_rate": 0.0009574652423079375, "loss": 2.2207, "step": 780 }, { "epoch": 0.16, "learning_rate": 0.0009573324815362528, "loss": 2.2344, "step": 781 }, { "epoch": 0.16, "learning_rate": 0.0009571995231323629, "loss": 2.2363, "step": 782 }, { "epoch": 0.16, "learning_rate": 0.0009570663671537248, "loss": 2.1504, "step": 783 }, { "epoch": 0.16, "learning_rate": 0.0009569330136578803, "loss": 2.1621, "step": 784 }, { "epoch": 0.16, "learning_rate": 0.0009567994627024572, "loss": 2.1895, "step": 785 }, { "epoch": 0.16, "learning_rate": 0.0009566657143451685, "loss": 2.2734, "step": 786 }, { "epoch": 0.16, "learning_rate": 0.0009565317686438122, "loss": 2.2773, "step": 787 }, { "epoch": 0.16, "learning_rate": 0.0009563976256562717, "loss": 2.248, "step": 788 }, { "epoch": 0.16, "learning_rate": 0.0009562632854405158, "loss": 2.1582, "step": 789 }, { "epoch": 0.16, "learning_rate": 0.0009561287480545984, "loss": 2.2344, "step": 790 }, { "epoch": 0.16, "learning_rate": 0.0009559940135566587, "loss": 2.2012, "step": 791 }, { "epoch": 0.16, "learning_rate": 0.000955859082004921, "loss": 2.25, "step": 792 }, { "epoch": 0.16, "learning_rate": 0.0009557239534576948, "loss": 2.1953, "step": 793 }, { "epoch": 0.16, "learning_rate": 0.0009555886279733746, "loss": 2.2383, "step": 794 }, { "epoch": 0.16, "learning_rate": 0.0009554531056104403, "loss": 2.2656, "step": 795 }, { "epoch": 0.16, "learning_rate": 0.0009553173864274567, "loss": 2.2832, "step": 796 }, { "epoch": 0.16, "learning_rate": 0.0009551814704830733, "loss": 2.2441, "step": 797 }, { "epoch": 0.16, "learning_rate": 0.0009550453578360254, "loss": 2.25, "step": 798 }, { "epoch": 0.16, "learning_rate": 0.0009549090485451327, "loss": 2.1836, "step": 799 }, { "epoch": 0.16, "learning_rate": 0.0009547725426693001, "loss": 2.1367, "step": 800 }, { "epoch": 0.16, "learning_rate": 0.0009546358402675173, "loss": 2.2246, "step": 801 }, { "epoch": 0.16, "learning_rate": 0.0009544989413988592, "loss": 2.2109, "step": 802 }, { "epoch": 0.16, "learning_rate": 0.0009543618461224854, "loss": 2.2871, "step": 803 }, { "epoch": 0.16, "learning_rate": 0.0009542245544976402, "loss": 2.3027, "step": 804 }, { "epoch": 0.16, "learning_rate": 0.0009540870665836534, "loss": 2.1797, "step": 805 }, { "epoch": 0.16, "learning_rate": 0.0009539493824399388, "loss": 2.2188, "step": 806 }, { "epoch": 0.16, "learning_rate": 0.0009538115021259956, "loss": 2.2773, "step": 807 }, { "epoch": 0.16, "learning_rate": 0.0009536734257014074, "loss": 2.1523, "step": 808 }, { "epoch": 0.16, "learning_rate": 0.000953535153225843, "loss": 2.2461, "step": 809 }, { "epoch": 0.16, "learning_rate": 0.0009533966847590551, "loss": 2.2188, "step": 810 }, { "epoch": 0.16, "learning_rate": 0.0009532580203608823, "loss": 2.251, "step": 811 }, { "epoch": 0.16, "learning_rate": 0.0009531191600912468, "loss": 2.1895, "step": 812 }, { "epoch": 0.17, "learning_rate": 0.0009529801040101558, "loss": 2.1592, "step": 813 }, { "epoch": 0.17, "learning_rate": 0.0009528408521777012, "loss": 2.252, "step": 814 }, { "epoch": 0.17, "learning_rate": 0.0009527014046540597, "loss": 2.1885, "step": 815 }, { "epoch": 0.17, "learning_rate": 0.0009525617614994921, "loss": 2.2637, "step": 816 }, { "epoch": 0.17, "learning_rate": 0.000952421922774344, "loss": 2.1494, "step": 817 }, { "epoch": 0.17, "learning_rate": 0.0009522818885390455, "loss": 2.1963, "step": 818 }, { "epoch": 0.17, "learning_rate": 0.0009521416588541111, "loss": 2.1738, "step": 819 }, { "epoch": 0.17, "learning_rate": 0.00095200123378014, "loss": 2.1377, "step": 820 }, { "epoch": 0.17, "learning_rate": 0.0009518606133778153, "loss": 2.2285, "step": 821 }, { "epoch": 0.17, "learning_rate": 0.0009517197977079052, "loss": 2.25, "step": 822 }, { "epoch": 0.17, "learning_rate": 0.0009515787868312618, "loss": 2.1855, "step": 823 }, { "epoch": 0.17, "learning_rate": 0.0009514375808088218, "loss": 2.1514, "step": 824 }, { "epoch": 0.17, "learning_rate": 0.0009512961797016062, "loss": 2.1758, "step": 825 }, { "epoch": 0.17, "learning_rate": 0.0009511545835707199, "loss": 2.2148, "step": 826 }, { "epoch": 0.17, "learning_rate": 0.0009510127924773528, "loss": 2.291, "step": 827 }, { "epoch": 0.17, "learning_rate": 0.0009508708064827785, "loss": 2.2441, "step": 828 }, { "epoch": 0.17, "learning_rate": 0.000950728625648355, "loss": 2.2422, "step": 829 }, { "epoch": 0.17, "learning_rate": 0.0009505862500355246, "loss": 2.1992, "step": 830 }, { "epoch": 0.17, "learning_rate": 0.0009504436797058136, "loss": 2.25, "step": 831 }, { "epoch": 0.17, "learning_rate": 0.0009503009147208323, "loss": 2.2188, "step": 832 }, { "epoch": 0.17, "learning_rate": 0.0009501579551422759, "loss": 2.1797, "step": 833 }, { "epoch": 0.17, "learning_rate": 0.0009500148010319225, "loss": 2.1875, "step": 834 }, { "epoch": 0.17, "learning_rate": 0.0009498714524516352, "loss": 2.127, "step": 835 }, { "epoch": 0.17, "learning_rate": 0.0009497279094633607, "loss": 2.1738, "step": 836 }, { "epoch": 0.17, "learning_rate": 0.0009495841721291299, "loss": 2.165, "step": 837 }, { "epoch": 0.17, "learning_rate": 0.0009494402405110579, "loss": 2.2012, "step": 838 }, { "epoch": 0.17, "learning_rate": 0.0009492961146713428, "loss": 2.1318, "step": 839 }, { "epoch": 0.17, "learning_rate": 0.0009491517946722681, "loss": 2.2188, "step": 840 }, { "epoch": 0.17, "learning_rate": 0.0009490072805761998, "loss": 2.2578, "step": 841 }, { "epoch": 0.17, "learning_rate": 0.0009488625724455887, "loss": 2.2285, "step": 842 }, { "epoch": 0.17, "learning_rate": 0.0009487176703429691, "loss": 2.2402, "step": 843 }, { "epoch": 0.17, "learning_rate": 0.0009485725743309593, "loss": 2.1641, "step": 844 }, { "epoch": 0.17, "learning_rate": 0.0009484272844722609, "loss": 2.2383, "step": 845 }, { "epoch": 0.17, "learning_rate": 0.0009482818008296599, "loss": 2.2188, "step": 846 }, { "epoch": 0.17, "learning_rate": 0.0009481361234660257, "loss": 2.1855, "step": 847 }, { "epoch": 0.17, "learning_rate": 0.0009479902524443116, "loss": 2.2148, "step": 848 }, { "epoch": 0.17, "learning_rate": 0.0009478441878275543, "loss": 2.1953, "step": 849 }, { "epoch": 0.17, "learning_rate": 0.0009476979296788746, "loss": 2.2148, "step": 850 }, { "epoch": 0.17, "learning_rate": 0.0009475514780614764, "loss": 2.209, "step": 851 }, { "epoch": 0.17, "learning_rate": 0.0009474048330386475, "loss": 2.1836, "step": 852 }, { "epoch": 0.17, "learning_rate": 0.0009472579946737594, "loss": 2.1982, "step": 853 }, { "epoch": 0.17, "learning_rate": 0.000947110963030267, "loss": 2.2266, "step": 854 }, { "epoch": 0.17, "learning_rate": 0.0009469637381717085, "loss": 2.1816, "step": 855 }, { "epoch": 0.17, "learning_rate": 0.0009468163201617061, "loss": 2.1855, "step": 856 }, { "epoch": 0.17, "learning_rate": 0.0009466687090639652, "loss": 2.1699, "step": 857 }, { "epoch": 0.17, "learning_rate": 0.0009465209049422744, "loss": 2.2246, "step": 858 }, { "epoch": 0.17, "learning_rate": 0.0009463729078605061, "loss": 2.1914, "step": 859 }, { "epoch": 0.17, "learning_rate": 0.0009462247178826158, "loss": 2.2285, "step": 860 }, { "epoch": 0.17, "learning_rate": 0.0009460763350726428, "loss": 2.1738, "step": 861 }, { "epoch": 0.17, "learning_rate": 0.0009459277594947091, "loss": 2.1738, "step": 862 }, { "epoch": 0.18, "learning_rate": 0.0009457789912130206, "loss": 2.1689, "step": 863 }, { "epoch": 0.18, "learning_rate": 0.0009456300302918658, "loss": 2.125, "step": 864 }, { "epoch": 0.18, "learning_rate": 0.0009454808767956173, "loss": 2.3477, "step": 865 }, { "epoch": 0.18, "learning_rate": 0.0009453315307887303, "loss": 2.2207, "step": 866 }, { "epoch": 0.18, "learning_rate": 0.0009451819923357434, "loss": 2.2012, "step": 867 }, { "epoch": 0.18, "learning_rate": 0.0009450322615012782, "loss": 2.1836, "step": 868 }, { "epoch": 0.18, "learning_rate": 0.0009448823383500396, "loss": 2.25, "step": 869 }, { "epoch": 0.18, "learning_rate": 0.0009447322229468156, "loss": 2.2334, "step": 870 }, { "epoch": 0.18, "learning_rate": 0.0009445819153564774, "loss": 2.2578, "step": 871 }, { "epoch": 0.18, "learning_rate": 0.0009444314156439787, "loss": 2.1797, "step": 872 }, { "epoch": 0.18, "learning_rate": 0.000944280723874357, "loss": 2.2568, "step": 873 }, { "epoch": 0.18, "learning_rate": 0.0009441298401127322, "loss": 2.1836, "step": 874 }, { "epoch": 0.18, "learning_rate": 0.0009439787644243078, "loss": 2.1211, "step": 875 }, { "epoch": 0.18, "learning_rate": 0.0009438274968743692, "loss": 2.1543, "step": 876 }, { "epoch": 0.18, "learning_rate": 0.0009436760375282858, "loss": 2.207, "step": 877 }, { "epoch": 0.18, "learning_rate": 0.0009435243864515093, "loss": 2.291, "step": 878 }, { "epoch": 0.18, "learning_rate": 0.0009433725437095743, "loss": 2.2168, "step": 879 }, { "epoch": 0.18, "learning_rate": 0.0009432205093680983, "loss": 2.1973, "step": 880 }, { "epoch": 0.18, "learning_rate": 0.0009430682834927817, "loss": 2.252, "step": 881 }, { "epoch": 0.18, "learning_rate": 0.0009429158661494077, "loss": 2.2354, "step": 882 }, { "epoch": 0.18, "learning_rate": 0.0009427632574038418, "loss": 2.2871, "step": 883 }, { "epoch": 0.18, "learning_rate": 0.0009426104573220326, "loss": 2.3105, "step": 884 }, { "epoch": 0.18, "learning_rate": 0.0009424574659700116, "loss": 2.1621, "step": 885 }, { "epoch": 0.18, "learning_rate": 0.0009423042834138924, "loss": 2.207, "step": 886 }, { "epoch": 0.18, "learning_rate": 0.0009421509097198715, "loss": 2.1641, "step": 887 }, { "epoch": 0.18, "learning_rate": 0.0009419973449542279, "loss": 2.2432, "step": 888 }, { "epoch": 0.18, "learning_rate": 0.0009418435891833236, "loss": 2.1973, "step": 889 }, { "epoch": 0.18, "learning_rate": 0.0009416896424736025, "loss": 2.1777, "step": 890 }, { "epoch": 0.18, "learning_rate": 0.0009415355048915915, "loss": 2.1133, "step": 891 }, { "epoch": 0.18, "learning_rate": 0.0009413811765038995, "loss": 2.1709, "step": 892 }, { "epoch": 0.18, "learning_rate": 0.0009412266573772186, "loss": 2.1777, "step": 893 }, { "epoch": 0.18, "learning_rate": 0.0009410719475783225, "loss": 2.2539, "step": 894 }, { "epoch": 0.18, "learning_rate": 0.000940917047174068, "loss": 2.1777, "step": 895 }, { "epoch": 0.18, "learning_rate": 0.0009407619562313936, "loss": 2.3145, "step": 896 }, { "epoch": 0.18, "learning_rate": 0.0009406066748173208, "loss": 2.2891, "step": 897 }, { "epoch": 0.18, "learning_rate": 0.0009404512029989527, "loss": 2.1816, "step": 898 }, { "epoch": 0.18, "learning_rate": 0.0009402955408434755, "loss": 2.1484, "step": 899 }, { "epoch": 0.18, "learning_rate": 0.000940139688418157, "loss": 2.1543, "step": 900 }, { "epoch": 0.18, "learning_rate": 0.0009399836457903477, "loss": 2.1865, "step": 901 }, { "epoch": 0.18, "learning_rate": 0.0009398274130274798, "loss": 2.2188, "step": 902 }, { "epoch": 0.18, "learning_rate": 0.0009396709901970678, "loss": 2.2539, "step": 903 }, { "epoch": 0.18, "learning_rate": 0.0009395143773667088, "loss": 2.1553, "step": 904 }, { "epoch": 0.18, "learning_rate": 0.0009393575746040813, "loss": 2.2285, "step": 905 }, { "epoch": 0.18, "learning_rate": 0.0009392005819769465, "loss": 2.2891, "step": 906 }, { "epoch": 0.18, "learning_rate": 0.0009390433995531474, "loss": 2.1309, "step": 907 }, { "epoch": 0.18, "learning_rate": 0.0009388860274006087, "loss": 2.2754, "step": 908 }, { "epoch": 0.18, "learning_rate": 0.0009387284655873376, "loss": 2.2168, "step": 909 }, { "epoch": 0.18, "learning_rate": 0.0009385707141814231, "loss": 2.2051, "step": 910 }, { "epoch": 0.18, "learning_rate": 0.0009384127732510361, "loss": 2.2578, "step": 911 }, { "epoch": 0.19, "learning_rate": 0.0009382546428644291, "loss": 2.1953, "step": 912 }, { "epoch": 0.19, "learning_rate": 0.0009380963230899371, "loss": 2.1865, "step": 913 }, { "epoch": 0.19, "learning_rate": 0.0009379378139959764, "loss": 2.291, "step": 914 }, { "epoch": 0.19, "learning_rate": 0.0009377791156510454, "loss": 2.2109, "step": 915 }, { "epoch": 0.19, "learning_rate": 0.0009376202281237241, "loss": 2.1504, "step": 916 }, { "epoch": 0.19, "learning_rate": 0.0009374611514826745, "loss": 2.25, "step": 917 }, { "epoch": 0.19, "learning_rate": 0.00093730188579664, "loss": 2.2422, "step": 918 }, { "epoch": 0.19, "learning_rate": 0.0009371424311344458, "loss": 2.1279, "step": 919 }, { "epoch": 0.19, "learning_rate": 0.0009369827875649992, "loss": 2.1914, "step": 920 }, { "epoch": 0.19, "learning_rate": 0.0009368229551572884, "loss": 2.2246, "step": 921 }, { "epoch": 0.19, "learning_rate": 0.0009366629339803836, "loss": 2.1406, "step": 922 }, { "epoch": 0.19, "learning_rate": 0.0009365027241034367, "loss": 2.1914, "step": 923 }, { "epoch": 0.19, "learning_rate": 0.0009363423255956809, "loss": 2.2051, "step": 924 }, { "epoch": 0.19, "learning_rate": 0.0009361817385264311, "loss": 2.2891, "step": 925 }, { "epoch": 0.19, "learning_rate": 0.0009360209629650834, "loss": 2.2119, "step": 926 }, { "epoch": 0.19, "learning_rate": 0.0009358599989811158, "loss": 2.1367, "step": 927 }, { "epoch": 0.19, "learning_rate": 0.0009356988466440872, "loss": 2.2256, "step": 928 }, { "epoch": 0.19, "learning_rate": 0.0009355375060236383, "loss": 2.1836, "step": 929 }, { "epoch": 0.19, "learning_rate": 0.000935375977189491, "loss": 2.2148, "step": 930 }, { "epoch": 0.19, "learning_rate": 0.0009352142602114486, "loss": 2.1934, "step": 931 }, { "epoch": 0.19, "learning_rate": 0.0009350523551593957, "loss": 2.2422, "step": 932 }, { "epoch": 0.19, "learning_rate": 0.000934890262103298, "loss": 2.168, "step": 933 }, { "epoch": 0.19, "learning_rate": 0.0009347279811132027, "loss": 2.082, "step": 934 }, { "epoch": 0.19, "learning_rate": 0.000934565512259238, "loss": 2.2246, "step": 935 }, { "epoch": 0.19, "learning_rate": 0.0009344028556116135, "loss": 2.2012, "step": 936 }, { "epoch": 0.19, "learning_rate": 0.00093424001124062, "loss": 2.2148, "step": 937 }, { "epoch": 0.19, "learning_rate": 0.0009340769792166288, "loss": 2.2217, "step": 938 }, { "epoch": 0.19, "learning_rate": 0.000933913759610093, "loss": 2.1045, "step": 939 }, { "epoch": 0.19, "learning_rate": 0.0009337503524915467, "loss": 2.2031, "step": 940 }, { "epoch": 0.19, "learning_rate": 0.0009335867579316047, "loss": 2.1631, "step": 941 }, { "epoch": 0.19, "learning_rate": 0.0009334229760009628, "loss": 2.21, "step": 942 }, { "epoch": 0.19, "learning_rate": 0.0009332590067703981, "loss": 2.1494, "step": 943 }, { "epoch": 0.19, "learning_rate": 0.0009330948503107684, "loss": 2.2129, "step": 944 }, { "epoch": 0.19, "learning_rate": 0.0009329305066930125, "loss": 2.1816, "step": 945 }, { "epoch": 0.19, "learning_rate": 0.0009327659759881499, "loss": 2.2422, "step": 946 }, { "epoch": 0.19, "learning_rate": 0.0009326012582672814, "loss": 2.1846, "step": 947 }, { "epoch": 0.19, "learning_rate": 0.0009324363536015878, "loss": 2.2051, "step": 948 }, { "epoch": 0.19, "learning_rate": 0.0009322712620623318, "loss": 2.1348, "step": 949 }, { "epoch": 0.19, "learning_rate": 0.000932105983720856, "loss": 2.1582, "step": 950 }, { "epoch": 0.19, "learning_rate": 0.0009319405186485838, "loss": 2.1992, "step": 951 }, { "epoch": 0.19, "learning_rate": 0.0009317748669170198, "loss": 2.1504, "step": 952 }, { "epoch": 0.19, "learning_rate": 0.0009316090285977486, "loss": 2.1436, "step": 953 }, { "epoch": 0.19, "learning_rate": 0.0009314430037624362, "loss": 2.1865, "step": 954 }, { "epoch": 0.19, "learning_rate": 0.0009312767924828283, "loss": 2.168, "step": 955 }, { "epoch": 0.19, "learning_rate": 0.0009311103948307519, "loss": 2.1309, "step": 956 }, { "epoch": 0.19, "learning_rate": 0.0009309438108781142, "loss": 2.1992, "step": 957 }, { "epoch": 0.19, "learning_rate": 0.000930777040696903, "loss": 2.2051, "step": 958 }, { "epoch": 0.19, "learning_rate": 0.0009306100843591867, "loss": 2.1426, "step": 959 }, { "epoch": 0.19, "learning_rate": 0.0009304429419371139, "loss": 2.168, "step": 960 }, { "epoch": 0.2, "learning_rate": 0.0009302756135029137, "loss": 2.1934, "step": 961 }, { "epoch": 0.2, "learning_rate": 0.0009301080991288957, "loss": 2.2539, "step": 962 }, { "epoch": 0.2, "learning_rate": 0.0009299403988874495, "loss": 2.2539, "step": 963 }, { "epoch": 0.2, "learning_rate": 0.0009297725128510456, "loss": 2.0898, "step": 964 }, { "epoch": 0.2, "learning_rate": 0.0009296044410922344, "loss": 2.1396, "step": 965 }, { "epoch": 0.2, "learning_rate": 0.0009294361836836465, "loss": 2.2129, "step": 966 }, { "epoch": 0.2, "learning_rate": 0.000929267740697993, "loss": 2.1621, "step": 967 }, { "epoch": 0.2, "learning_rate": 0.0009290991122080649, "loss": 2.1719, "step": 968 }, { "epoch": 0.2, "learning_rate": 0.0009289302982867335, "loss": 2.2559, "step": 969 }, { "epoch": 0.2, "learning_rate": 0.0009287612990069508, "loss": 2.2246, "step": 970 }, { "epoch": 0.2, "learning_rate": 0.0009285921144417475, "loss": 2.2207, "step": 971 }, { "epoch": 0.2, "learning_rate": 0.0009284227446642357, "loss": 2.1514, "step": 972 }, { "epoch": 0.2, "learning_rate": 0.0009282531897476071, "loss": 2.2324, "step": 973 }, { "epoch": 0.2, "learning_rate": 0.0009280834497651332, "loss": 2.2383, "step": 974 }, { "epoch": 0.2, "learning_rate": 0.0009279135247901658, "loss": 2.1953, "step": 975 }, { "epoch": 0.2, "learning_rate": 0.0009277434148961363, "loss": 2.1934, "step": 976 }, { "epoch": 0.2, "learning_rate": 0.0009275731201565563, "loss": 2.25, "step": 977 }, { "epoch": 0.2, "learning_rate": 0.000927402640645017, "loss": 2.1758, "step": 978 }, { "epoch": 0.2, "learning_rate": 0.0009272319764351899, "loss": 2.1758, "step": 979 }, { "epoch": 0.2, "learning_rate": 0.0009270611276008258, "loss": 2.2168, "step": 980 }, { "epoch": 0.2, "learning_rate": 0.0009268900942157556, "loss": 2.1963, "step": 981 }, { "epoch": 0.2, "learning_rate": 0.00092671887635389, "loss": 2.2139, "step": 982 }, { "epoch": 0.2, "learning_rate": 0.0009265474740892192, "loss": 2.1855, "step": 983 }, { "epoch": 0.2, "learning_rate": 0.0009263758874958131, "loss": 2.2598, "step": 984 }, { "epoch": 0.2, "learning_rate": 0.0009262041166478213, "loss": 2.1045, "step": 985 }, { "epoch": 0.2, "learning_rate": 0.0009260321616194733, "loss": 2.1855, "step": 986 }, { "epoch": 0.2, "learning_rate": 0.0009258600224850777, "loss": 2.2285, "step": 987 }, { "epoch": 0.2, "learning_rate": 0.0009256876993190231, "loss": 2.1133, "step": 988 }, { "epoch": 0.2, "learning_rate": 0.0009255151921957772, "loss": 2.1309, "step": 989 }, { "epoch": 0.2, "learning_rate": 0.0009253425011898878, "loss": 2.1729, "step": 990 }, { "epoch": 0.2, "learning_rate": 0.0009251696263759815, "loss": 2.1289, "step": 991 }, { "epoch": 0.2, "learning_rate": 0.0009249965678287647, "loss": 2.1982, "step": 992 }, { "epoch": 0.2, "learning_rate": 0.000924823325623023, "loss": 2.2012, "step": 993 }, { "epoch": 0.2, "learning_rate": 0.0009246498998336218, "loss": 2.2754, "step": 994 }, { "epoch": 0.2, "learning_rate": 0.0009244762905355053, "loss": 2.1396, "step": 995 }, { "epoch": 0.2, "learning_rate": 0.0009243024978036974, "loss": 2.1357, "step": 996 }, { "epoch": 0.2, "learning_rate": 0.0009241285217133009, "loss": 2.2344, "step": 997 }, { "epoch": 0.2, "learning_rate": 0.0009239543623394982, "loss": 2.1309, "step": 998 }, { "epoch": 0.2, "learning_rate": 0.0009237800197575505, "loss": 2.252, "step": 999 }, { "epoch": 0.2, "learning_rate": 0.0009236054940427987, "loss": 2.2266, "step": 1000 }, { "epoch": 0.2, "learning_rate": 0.0009234307852706624, "loss": 2.252, "step": 1001 }, { "epoch": 0.2, "learning_rate": 0.0009232558935166407, "loss": 2.293, "step": 1002 }, { "epoch": 0.2, "learning_rate": 0.0009230808188563109, "loss": 2.2139, "step": 1003 }, { "epoch": 0.2, "learning_rate": 0.0009229055613653308, "loss": 2.168, "step": 1004 }, { "epoch": 0.2, "learning_rate": 0.0009227301211194356, "loss": 2.126, "step": 1005 }, { "epoch": 0.2, "learning_rate": 0.0009225544981944409, "loss": 2.2656, "step": 1006 }, { "epoch": 0.2, "learning_rate": 0.0009223786926662403, "loss": 2.207, "step": 1007 }, { "epoch": 0.2, "learning_rate": 0.0009222027046108066, "loss": 2.2266, "step": 1008 }, { "epoch": 0.2, "learning_rate": 0.0009220265341041916, "loss": 2.2754, "step": 1009 }, { "epoch": 0.2, "learning_rate": 0.0009218501812225257, "loss": 2.1475, "step": 1010 }, { "epoch": 0.21, "learning_rate": 0.0009216736460420183, "loss": 2.2188, "step": 1011 }, { "epoch": 0.21, "learning_rate": 0.0009214969286389577, "loss": 2.1699, "step": 1012 }, { "epoch": 0.21, "learning_rate": 0.0009213200290897103, "loss": 2.1328, "step": 1013 }, { "epoch": 0.21, "learning_rate": 0.0009211429474707221, "loss": 2.1699, "step": 1014 }, { "epoch": 0.21, "learning_rate": 0.0009209656838585174, "loss": 2.1494, "step": 1015 }, { "epoch": 0.21, "learning_rate": 0.0009207882383296988, "loss": 2.1289, "step": 1016 }, { "epoch": 0.21, "learning_rate": 0.000920610610960948, "loss": 2.2363, "step": 1017 }, { "epoch": 0.21, "learning_rate": 0.0009204328018290252, "loss": 2.209, "step": 1018 }, { "epoch": 0.21, "learning_rate": 0.0009202548110107688, "loss": 2.207, "step": 1019 }, { "epoch": 0.21, "learning_rate": 0.0009200766385830962, "loss": 2.1328, "step": 1020 }, { "epoch": 0.21, "learning_rate": 0.0009198982846230028, "loss": 2.1846, "step": 1021 }, { "epoch": 0.21, "learning_rate": 0.0009197197492075631, "loss": 2.2207, "step": 1022 }, { "epoch": 0.21, "learning_rate": 0.000919541032413929, "loss": 2.1426, "step": 1023 }, { "epoch": 0.21, "learning_rate": 0.000919362134319332, "loss": 2.127, "step": 1024 }, { "epoch": 0.21, "learning_rate": 0.0009191830550010811, "loss": 2.1318, "step": 1025 }, { "epoch": 0.21, "learning_rate": 0.0009190037945365637, "loss": 2.1836, "step": 1026 }, { "epoch": 0.21, "learning_rate": 0.0009188243530032457, "loss": 2.127, "step": 1027 }, { "epoch": 0.21, "learning_rate": 0.0009186447304786714, "loss": 2.2217, "step": 1028 }, { "epoch": 0.21, "learning_rate": 0.0009184649270404628, "loss": 2.1367, "step": 1029 }, { "epoch": 0.21, "learning_rate": 0.0009182849427663204, "loss": 2.1348, "step": 1030 }, { "epoch": 0.21, "learning_rate": 0.0009181047777340232, "loss": 2.2539, "step": 1031 }, { "epoch": 0.21, "learning_rate": 0.0009179244320214275, "loss": 2.2129, "step": 1032 }, { "epoch": 0.21, "learning_rate": 0.0009177439057064682, "loss": 2.209, "step": 1033 }, { "epoch": 0.21, "learning_rate": 0.0009175631988671583, "loss": 2.2383, "step": 1034 }, { "epoch": 0.21, "learning_rate": 0.0009173823115815886, "loss": 2.1582, "step": 1035 }, { "epoch": 0.21, "learning_rate": 0.000917201243927928, "loss": 2.2207, "step": 1036 }, { "epoch": 0.21, "learning_rate": 0.0009170199959844231, "loss": 2.1885, "step": 1037 }, { "epoch": 0.21, "learning_rate": 0.0009168385678293986, "loss": 2.1562, "step": 1038 }, { "epoch": 0.21, "learning_rate": 0.0009166569595412575, "loss": 2.1729, "step": 1039 }, { "epoch": 0.21, "learning_rate": 0.0009164751711984798, "loss": 2.0889, "step": 1040 }, { "epoch": 0.21, "learning_rate": 0.0009162932028796239, "loss": 2.1074, "step": 1041 }, { "epoch": 0.21, "learning_rate": 0.0009161110546633258, "loss": 2.2363, "step": 1042 }, { "epoch": 0.21, "learning_rate": 0.0009159287266282992, "loss": 2.1846, "step": 1043 }, { "epoch": 0.21, "learning_rate": 0.0009157462188533356, "loss": 2.2168, "step": 1044 }, { "epoch": 0.21, "learning_rate": 0.0009155635314173039, "loss": 2.1318, "step": 1045 }, { "epoch": 0.21, "learning_rate": 0.0009153806643991514, "loss": 2.1289, "step": 1046 }, { "epoch": 0.21, "learning_rate": 0.0009151976178779019, "loss": 2.1719, "step": 1047 }, { "epoch": 0.21, "learning_rate": 0.0009150143919326577, "loss": 2.1641, "step": 1048 }, { "epoch": 0.21, "learning_rate": 0.0009148309866425981, "loss": 2.2051, "step": 1049 }, { "epoch": 0.21, "learning_rate": 0.0009146474020869801, "loss": 2.167, "step": 1050 }, { "epoch": 0.21, "learning_rate": 0.0009144636383451384, "loss": 2.1445, "step": 1051 }, { "epoch": 0.21, "learning_rate": 0.0009142796954964845, "loss": 2.0801, "step": 1052 }, { "epoch": 0.21, "learning_rate": 0.0009140955736205078, "loss": 2.1484, "step": 1053 }, { "epoch": 0.21, "learning_rate": 0.0009139112727967751, "loss": 2.1602, "step": 1054 }, { "epoch": 0.21, "learning_rate": 0.0009137267931049304, "loss": 2.1631, "step": 1055 }, { "epoch": 0.21, "learning_rate": 0.0009135421346246948, "loss": 2.166, "step": 1056 }, { "epoch": 0.21, "learning_rate": 0.0009133572974358668, "loss": 2.1621, "step": 1057 }, { "epoch": 0.21, "learning_rate": 0.0009131722816183224, "loss": 2.1328, "step": 1058 }, { "epoch": 0.21, "learning_rate": 0.0009129870872520143, "loss": 2.2051, "step": 1059 }, { "epoch": 0.22, "learning_rate": 0.0009128017144169727, "loss": 2.1289, "step": 1060 }, { "epoch": 0.22, "learning_rate": 0.000912616163193305, "loss": 2.2285, "step": 1061 }, { "epoch": 0.22, "learning_rate": 0.0009124304336611953, "loss": 2.1562, "step": 1062 }, { "epoch": 0.22, "learning_rate": 0.0009122445259009052, "loss": 2.1445, "step": 1063 }, { "epoch": 0.22, "learning_rate": 0.0009120584399927727, "loss": 2.2148, "step": 1064 }, { "epoch": 0.22, "learning_rate": 0.0009118721760172137, "loss": 2.1367, "step": 1065 }, { "epoch": 0.22, "learning_rate": 0.0009116857340547202, "loss": 2.1816, "step": 1066 }, { "epoch": 0.22, "learning_rate": 0.0009114991141858617, "loss": 2.166, "step": 1067 }, { "epoch": 0.22, "learning_rate": 0.0009113123164912842, "loss": 2.2207, "step": 1068 }, { "epoch": 0.22, "learning_rate": 0.0009111253410517106, "loss": 2.1602, "step": 1069 }, { "epoch": 0.22, "learning_rate": 0.0009109381879479407, "loss": 2.2109, "step": 1070 }, { "epoch": 0.22, "learning_rate": 0.0009107508572608512, "loss": 2.2695, "step": 1071 }, { "epoch": 0.22, "learning_rate": 0.0009105633490713952, "loss": 2.168, "step": 1072 }, { "epoch": 0.22, "learning_rate": 0.0009103756634606029, "loss": 2.1709, "step": 1073 }, { "epoch": 0.22, "learning_rate": 0.0009101878005095812, "loss": 2.207, "step": 1074 }, { "epoch": 0.22, "learning_rate": 0.0009099997602995128, "loss": 2.1494, "step": 1075 }, { "epoch": 0.22, "learning_rate": 0.0009098115429116582, "loss": 2.1934, "step": 1076 }, { "epoch": 0.22, "learning_rate": 0.0009096231484273533, "loss": 2.1074, "step": 1077 }, { "epoch": 0.22, "learning_rate": 0.0009094345769280119, "loss": 2.1846, "step": 1078 }, { "epoch": 0.22, "learning_rate": 0.0009092458284951229, "loss": 2.1885, "step": 1079 }, { "epoch": 0.22, "learning_rate": 0.0009090569032102525, "loss": 2.1738, "step": 1080 }, { "epoch": 0.22, "learning_rate": 0.0009088678011550431, "loss": 2.2227, "step": 1081 }, { "epoch": 0.22, "learning_rate": 0.0009086785224112132, "loss": 2.1924, "step": 1082 }, { "epoch": 0.22, "learning_rate": 0.0009084890670605586, "loss": 2.1699, "step": 1083 }, { "epoch": 0.22, "learning_rate": 0.00090829943518495, "loss": 2.1953, "step": 1084 }, { "epoch": 0.22, "learning_rate": 0.0009081096268663358, "loss": 2.2031, "step": 1085 }, { "epoch": 0.22, "learning_rate": 0.0009079196421867395, "loss": 2.2197, "step": 1086 }, { "epoch": 0.22, "learning_rate": 0.0009077294812282615, "loss": 2.1641, "step": 1087 }, { "epoch": 0.22, "learning_rate": 0.0009075391440730783, "loss": 2.165, "step": 1088 }, { "epoch": 0.22, "learning_rate": 0.0009073486308034422, "loss": 2.21, "step": 1089 }, { "epoch": 0.22, "learning_rate": 0.000907157941501682, "loss": 2.168, "step": 1090 }, { "epoch": 0.22, "learning_rate": 0.0009069670762502023, "loss": 2.1162, "step": 1091 }, { "epoch": 0.22, "learning_rate": 0.0009067760351314837, "loss": 2.25, "step": 1092 }, { "epoch": 0.22, "learning_rate": 0.0009065848182280834, "loss": 2.1426, "step": 1093 }, { "epoch": 0.22, "learning_rate": 0.0009063934256226337, "loss": 2.1377, "step": 1094 }, { "epoch": 0.22, "learning_rate": 0.0009062018573978434, "loss": 2.2217, "step": 1095 }, { "epoch": 0.22, "learning_rate": 0.000906010113636497, "loss": 2.1826, "step": 1096 }, { "epoch": 0.22, "learning_rate": 0.0009058181944214547, "loss": 2.2461, "step": 1097 }, { "epoch": 0.22, "learning_rate": 0.000905626099835653, "loss": 2.1875, "step": 1098 }, { "epoch": 0.22, "learning_rate": 0.0009054338299621037, "loss": 2.2598, "step": 1099 }, { "epoch": 0.22, "learning_rate": 0.0009052413848838945, "loss": 2.1641, "step": 1100 }, { "epoch": 0.22, "learning_rate": 0.000905048764684189, "loss": 2.124, "step": 1101 }, { "epoch": 0.22, "learning_rate": 0.0009048559694462262, "loss": 2.0977, "step": 1102 }, { "epoch": 0.22, "learning_rate": 0.0009046629992533208, "loss": 2.1211, "step": 1103 }, { "epoch": 0.22, "learning_rate": 0.0009044698541888634, "loss": 2.1387, "step": 1104 }, { "epoch": 0.22, "learning_rate": 0.0009042765343363197, "loss": 2.1904, "step": 1105 }, { "epoch": 0.22, "learning_rate": 0.0009040830397792313, "loss": 2.1621, "step": 1106 }, { "epoch": 0.22, "learning_rate": 0.0009038893706012152, "loss": 2.1719, "step": 1107 }, { "epoch": 0.22, "learning_rate": 0.0009036955268859638, "loss": 2.1582, "step": 1108 }, { "epoch": 0.23, "learning_rate": 0.0009035015087172449, "loss": 2.0938, "step": 1109 }, { "epoch": 0.23, "learning_rate": 0.0009033073161789019, "loss": 2.0957, "step": 1110 }, { "epoch": 0.23, "learning_rate": 0.0009031129493548529, "loss": 2.1377, "step": 1111 }, { "epoch": 0.23, "learning_rate": 0.0009029184083290926, "loss": 2.2402, "step": 1112 }, { "epoch": 0.23, "learning_rate": 0.0009027236931856894, "loss": 2.2217, "step": 1113 }, { "epoch": 0.23, "learning_rate": 0.0009025288040087885, "loss": 2.1719, "step": 1114 }, { "epoch": 0.23, "learning_rate": 0.0009023337408826089, "loss": 2.2148, "step": 1115 }, { "epoch": 0.23, "learning_rate": 0.0009021385038914457, "loss": 2.1934, "step": 1116 }, { "epoch": 0.23, "learning_rate": 0.0009019430931196689, "loss": 2.1855, "step": 1117 }, { "epoch": 0.23, "learning_rate": 0.0009017475086517233, "loss": 2.2402, "step": 1118 }, { "epoch": 0.23, "learning_rate": 0.0009015517505721293, "loss": 2.1426, "step": 1119 }, { "epoch": 0.23, "learning_rate": 0.0009013558189654818, "loss": 2.1943, "step": 1120 }, { "epoch": 0.23, "learning_rate": 0.0009011597139164511, "loss": 2.1611, "step": 1121 }, { "epoch": 0.23, "learning_rate": 0.0009009634355097822, "loss": 2.1621, "step": 1122 }, { "epoch": 0.23, "learning_rate": 0.000900766983830295, "loss": 2.1074, "step": 1123 }, { "epoch": 0.23, "learning_rate": 0.0009005703589628844, "loss": 2.2559, "step": 1124 }, { "epoch": 0.23, "learning_rate": 0.0009003735609925201, "loss": 2.1104, "step": 1125 }, { "epoch": 0.23, "learning_rate": 0.0009001765900042468, "loss": 2.2402, "step": 1126 }, { "epoch": 0.23, "learning_rate": 0.0008999794460831835, "loss": 2.0957, "step": 1127 }, { "epoch": 0.23, "learning_rate": 0.0008997821293145243, "loss": 2.1191, "step": 1128 }, { "epoch": 0.23, "learning_rate": 0.0008995846397835381, "loss": 2.1875, "step": 1129 }, { "epoch": 0.23, "learning_rate": 0.000899386977575568, "loss": 2.2031, "step": 1130 }, { "epoch": 0.23, "learning_rate": 0.0008991891427760321, "loss": 2.1602, "step": 1131 }, { "epoch": 0.23, "learning_rate": 0.0008989911354704229, "loss": 2.0957, "step": 1132 }, { "epoch": 0.23, "learning_rate": 0.0008987929557443075, "loss": 2.1689, "step": 1133 }, { "epoch": 0.23, "learning_rate": 0.0008985946036833278, "loss": 2.1768, "step": 1134 }, { "epoch": 0.23, "learning_rate": 0.0008983960793731995, "loss": 2.0723, "step": 1135 }, { "epoch": 0.23, "learning_rate": 0.0008981973828997134, "loss": 2.1279, "step": 1136 }, { "epoch": 0.23, "learning_rate": 0.0008979985143487344, "loss": 2.0947, "step": 1137 }, { "epoch": 0.23, "learning_rate": 0.0008977994738062016, "loss": 2.1426, "step": 1138 }, { "epoch": 0.23, "learning_rate": 0.0008976002613581288, "loss": 2.1104, "step": 1139 }, { "epoch": 0.23, "learning_rate": 0.0008974008770906039, "loss": 2.2246, "step": 1140 }, { "epoch": 0.23, "learning_rate": 0.000897201321089789, "loss": 2.1729, "step": 1141 }, { "epoch": 0.23, "learning_rate": 0.0008970015934419204, "loss": 2.207, "step": 1142 }, { "epoch": 0.23, "learning_rate": 0.000896801694233309, "loss": 2.2168, "step": 1143 }, { "epoch": 0.23, "learning_rate": 0.0008966016235503389, "loss": 2.082, "step": 1144 }, { "epoch": 0.23, "learning_rate": 0.0008964013814794693, "loss": 2.2129, "step": 1145 }, { "epoch": 0.23, "learning_rate": 0.0008962009681072332, "loss": 2.1328, "step": 1146 }, { "epoch": 0.23, "learning_rate": 0.0008960003835202369, "loss": 2.1426, "step": 1147 }, { "epoch": 0.23, "learning_rate": 0.0008957996278051617, "loss": 2.1074, "step": 1148 }, { "epoch": 0.23, "learning_rate": 0.0008955987010487623, "loss": 2.1729, "step": 1149 }, { "epoch": 0.23, "learning_rate": 0.0008953976033378674, "loss": 2.1592, "step": 1150 }, { "epoch": 0.23, "learning_rate": 0.0008951963347593796, "loss": 2.2344, "step": 1151 }, { "epoch": 0.23, "learning_rate": 0.0008949948954002756, "loss": 2.168, "step": 1152 }, { "epoch": 0.23, "learning_rate": 0.0008947932853476051, "loss": 2.1543, "step": 1153 }, { "epoch": 0.23, "learning_rate": 0.0008945915046884926, "loss": 2.1836, "step": 1154 }, { "epoch": 0.23, "learning_rate": 0.0008943895535101356, "loss": 2.1963, "step": 1155 }, { "epoch": 0.23, "learning_rate": 0.0008941874318998057, "loss": 2.1328, "step": 1156 }, { "epoch": 0.23, "learning_rate": 0.0008939851399448477, "loss": 2.1855, "step": 1157 }, { "epoch": 0.24, "learning_rate": 0.0008937826777326807, "loss": 2.1143, "step": 1158 }, { "epoch": 0.24, "learning_rate": 0.0008935800453507965, "loss": 2.1875, "step": 1159 }, { "epoch": 0.24, "learning_rate": 0.0008933772428867613, "loss": 2.1621, "step": 1160 }, { "epoch": 0.24, "learning_rate": 0.0008931742704282139, "loss": 2.1875, "step": 1161 }, { "epoch": 0.24, "learning_rate": 0.0008929711280628676, "loss": 2.2793, "step": 1162 }, { "epoch": 0.24, "learning_rate": 0.0008927678158785085, "loss": 2.2109, "step": 1163 }, { "epoch": 0.24, "learning_rate": 0.0008925643339629957, "loss": 2.1748, "step": 1164 }, { "epoch": 0.24, "learning_rate": 0.0008923606824042627, "loss": 2.0762, "step": 1165 }, { "epoch": 0.24, "learning_rate": 0.0008921568612903153, "loss": 2.168, "step": 1166 }, { "epoch": 0.24, "learning_rate": 0.0008919528707092332, "loss": 2.124, "step": 1167 }, { "epoch": 0.24, "learning_rate": 0.0008917487107491693, "loss": 2.2021, "step": 1168 }, { "epoch": 0.24, "learning_rate": 0.000891544381498349, "loss": 2.209, "step": 1169 }, { "epoch": 0.24, "learning_rate": 0.0008913398830450721, "loss": 2.1816, "step": 1170 }, { "epoch": 0.24, "learning_rate": 0.0008911352154777101, "loss": 2.1162, "step": 1171 }, { "epoch": 0.24, "learning_rate": 0.0008909303788847087, "loss": 2.209, "step": 1172 }, { "epoch": 0.24, "learning_rate": 0.0008907253733545864, "loss": 2.1768, "step": 1173 }, { "epoch": 0.24, "learning_rate": 0.000890520198975934, "loss": 2.1387, "step": 1174 }, { "epoch": 0.24, "learning_rate": 0.0008903148558374162, "loss": 2.1455, "step": 1175 }, { "epoch": 0.24, "learning_rate": 0.0008901093440277701, "loss": 2.1836, "step": 1176 }, { "epoch": 0.24, "learning_rate": 0.0008899036636358058, "loss": 2.2324, "step": 1177 }, { "epoch": 0.24, "learning_rate": 0.0008896978147504063, "loss": 2.1836, "step": 1178 }, { "epoch": 0.24, "learning_rate": 0.0008894917974605275, "loss": 2.2402, "step": 1179 }, { "epoch": 0.24, "learning_rate": 0.0008892856118551978, "loss": 2.1152, "step": 1180 }, { "epoch": 0.24, "learning_rate": 0.0008890792580235183, "loss": 2.1367, "step": 1181 }, { "epoch": 0.24, "learning_rate": 0.0008888727360546635, "loss": 2.041, "step": 1182 }, { "epoch": 0.24, "learning_rate": 0.0008886660460378798, "loss": 2.1523, "step": 1183 }, { "epoch": 0.24, "learning_rate": 0.0008884591880624861, "loss": 2.1709, "step": 1184 }, { "epoch": 0.24, "learning_rate": 0.000888252162217875, "loss": 2.1924, "step": 1185 }, { "epoch": 0.24, "learning_rate": 0.0008880449685935102, "loss": 2.1895, "step": 1186 }, { "epoch": 0.24, "learning_rate": 0.0008878376072789291, "loss": 2.1699, "step": 1187 }, { "epoch": 0.24, "learning_rate": 0.0008876300783637408, "loss": 2.1748, "step": 1188 }, { "epoch": 0.24, "learning_rate": 0.0008874223819376272, "loss": 2.1797, "step": 1189 }, { "epoch": 0.24, "learning_rate": 0.0008872145180903423, "loss": 2.1641, "step": 1190 }, { "epoch": 0.24, "learning_rate": 0.000887006486911713, "loss": 2.2324, "step": 1191 }, { "epoch": 0.24, "learning_rate": 0.0008867982884916376, "loss": 2.2539, "step": 1192 }, { "epoch": 0.24, "learning_rate": 0.0008865899229200878, "loss": 2.0557, "step": 1193 }, { "epoch": 0.24, "learning_rate": 0.0008863813902871067, "loss": 2.1943, "step": 1194 }, { "epoch": 0.24, "learning_rate": 0.0008861726906828096, "loss": 2.2246, "step": 1195 }, { "epoch": 0.24, "learning_rate": 0.0008859638241973845, "loss": 2.1816, "step": 1196 }, { "epoch": 0.24, "learning_rate": 0.0008857547909210912, "loss": 2.2002, "step": 1197 }, { "epoch": 0.24, "learning_rate": 0.0008855455909442615, "loss": 2.0742, "step": 1198 }, { "epoch": 0.24, "learning_rate": 0.0008853362243572994, "loss": 2.2871, "step": 1199 }, { "epoch": 0.24, "learning_rate": 0.0008851266912506806, "loss": 2.1484, "step": 1200 }, { "epoch": 0.24, "learning_rate": 0.0008849169917149531, "loss": 2.1758, "step": 1201 }, { "epoch": 0.24, "learning_rate": 0.000884707125840737, "loss": 2.1484, "step": 1202 }, { "epoch": 0.24, "learning_rate": 0.0008844970937187236, "loss": 2.1328, "step": 1203 }, { "epoch": 0.24, "learning_rate": 0.0008842868954396766, "loss": 2.1035, "step": 1204 }, { "epoch": 0.24, "learning_rate": 0.0008840765310944315, "loss": 2.0947, "step": 1205 }, { "epoch": 0.24, "learning_rate": 0.000883866000773895, "loss": 2.1201, "step": 1206 }, { "epoch": 0.24, "learning_rate": 0.0008836553045690462, "loss": 2.2061, "step": 1207 }, { "epoch": 0.25, "learning_rate": 0.0008834444425709356, "loss": 2.1641, "step": 1208 }, { "epoch": 0.25, "learning_rate": 0.0008832334148706853, "loss": 2.1201, "step": 1209 }, { "epoch": 0.25, "learning_rate": 0.000883022221559489, "loss": 2.2051, "step": 1210 }, { "epoch": 0.25, "learning_rate": 0.0008828108627286122, "loss": 2.165, "step": 1211 }, { "epoch": 0.25, "learning_rate": 0.0008825993384693917, "loss": 2.1582, "step": 1212 }, { "epoch": 0.25, "learning_rate": 0.0008823876488732357, "loss": 2.1953, "step": 1213 }, { "epoch": 0.25, "learning_rate": 0.0008821757940316242, "loss": 2.1133, "step": 1214 }, { "epoch": 0.25, "learning_rate": 0.0008819637740361081, "loss": 2.1816, "step": 1215 }, { "epoch": 0.25, "learning_rate": 0.0008817515889783105, "loss": 2.1387, "step": 1216 }, { "epoch": 0.25, "learning_rate": 0.0008815392389499248, "loss": 2.3008, "step": 1217 }, { "epoch": 0.25, "learning_rate": 0.0008813267240427164, "loss": 2.2305, "step": 1218 }, { "epoch": 0.25, "learning_rate": 0.0008811140443485217, "loss": 2.1758, "step": 1219 }, { "epoch": 0.25, "learning_rate": 0.0008809011999592484, "loss": 2.1562, "step": 1220 }, { "epoch": 0.25, "learning_rate": 0.0008806881909668754, "loss": 2.165, "step": 1221 }, { "epoch": 0.25, "learning_rate": 0.0008804750174634526, "loss": 2.0996, "step": 1222 }, { "epoch": 0.25, "learning_rate": 0.0008802616795411009, "loss": 2.1123, "step": 1223 }, { "epoch": 0.25, "learning_rate": 0.0008800481772920126, "loss": 2.123, "step": 1224 }, { "epoch": 0.25, "learning_rate": 0.0008798345108084507, "loss": 2.1426, "step": 1225 }, { "epoch": 0.25, "learning_rate": 0.0008796206801827493, "loss": 2.1211, "step": 1226 }, { "epoch": 0.25, "learning_rate": 0.0008794066855073135, "loss": 2.1484, "step": 1227 }, { "epoch": 0.25, "learning_rate": 0.0008791925268746192, "loss": 2.1973, "step": 1228 }, { "epoch": 0.25, "learning_rate": 0.0008789782043772132, "loss": 2.1445, "step": 1229 }, { "epoch": 0.25, "learning_rate": 0.0008787637181077129, "loss": 2.1885, "step": 1230 }, { "epoch": 0.25, "learning_rate": 0.0008785490681588069, "loss": 2.1982, "step": 1231 }, { "epoch": 0.25, "learning_rate": 0.000878334254623254, "loss": 2.2021, "step": 1232 }, { "epoch": 0.25, "learning_rate": 0.0008781192775938843, "loss": 2.1572, "step": 1233 }, { "epoch": 0.25, "learning_rate": 0.0008779041371635981, "loss": 2.1016, "step": 1234 }, { "epoch": 0.25, "learning_rate": 0.0008776888334253662, "loss": 2.2422, "step": 1235 }, { "epoch": 0.25, "learning_rate": 0.0008774733664722305, "loss": 2.207, "step": 1236 }, { "epoch": 0.25, "learning_rate": 0.0008772577363973033, "loss": 2.2148, "step": 1237 }, { "epoch": 0.25, "learning_rate": 0.0008770419432937667, "loss": 2.2227, "step": 1238 }, { "epoch": 0.25, "learning_rate": 0.0008768259872548742, "loss": 2.1787, "step": 1239 }, { "epoch": 0.25, "learning_rate": 0.0008766098683739494, "loss": 2.1357, "step": 1240 }, { "epoch": 0.25, "learning_rate": 0.0008763935867443858, "loss": 2.251, "step": 1241 }, { "epoch": 0.25, "learning_rate": 0.0008761771424596476, "loss": 2.168, "step": 1242 }, { "epoch": 0.25, "learning_rate": 0.0008759605356132698, "loss": 2.2148, "step": 1243 }, { "epoch": 0.25, "learning_rate": 0.0008757437662988566, "loss": 2.3203, "step": 1244 }, { "epoch": 0.25, "learning_rate": 0.0008755268346100834, "loss": 2.126, "step": 1245 }, { "epoch": 0.25, "learning_rate": 0.0008753097406406948, "loss": 2.1035, "step": 1246 }, { "epoch": 0.25, "learning_rate": 0.0008750924844845064, "loss": 2.126, "step": 1247 }, { "epoch": 0.25, "learning_rate": 0.0008748750662354034, "loss": 2.1631, "step": 1248 }, { "epoch": 0.25, "learning_rate": 0.0008746574859873416, "loss": 2.1133, "step": 1249 }, { "epoch": 0.25, "learning_rate": 0.0008744397438343456, "loss": 2.1738, "step": 1250 }, { "epoch": 0.25, "learning_rate": 0.0008742218398705114, "loss": 2.1738, "step": 1251 }, { "epoch": 0.25, "learning_rate": 0.000874003774190004, "loss": 2.1045, "step": 1252 }, { "epoch": 0.25, "learning_rate": 0.0008737855468870587, "loss": 2.1621, "step": 1253 }, { "epoch": 0.25, "learning_rate": 0.0008735671580559803, "loss": 2.1182, "step": 1254 }, { "epoch": 0.25, "learning_rate": 0.0008733486077911439, "loss": 2.1523, "step": 1255 }, { "epoch": 0.25, "learning_rate": 0.0008731298961869938, "loss": 2.1348, "step": 1256 }, { "epoch": 0.26, "learning_rate": 0.0008729110233380445, "loss": 2.1084, "step": 1257 }, { "epoch": 0.26, "learning_rate": 0.0008726919893388798, "loss": 2.1895, "step": 1258 }, { "epoch": 0.26, "learning_rate": 0.0008724727942841535, "loss": 2.1768, "step": 1259 }, { "epoch": 0.26, "learning_rate": 0.0008722534382685888, "loss": 2.1094, "step": 1260 }, { "epoch": 0.26, "learning_rate": 0.0008720339213869781, "loss": 2.1084, "step": 1261 }, { "epoch": 0.26, "learning_rate": 0.0008718142437341843, "loss": 2.1826, "step": 1262 }, { "epoch": 0.26, "learning_rate": 0.0008715944054051386, "loss": 2.1699, "step": 1263 }, { "epoch": 0.26, "learning_rate": 0.0008713744064948425, "loss": 2.1504, "step": 1264 }, { "epoch": 0.26, "learning_rate": 0.0008711542470983667, "loss": 2.1396, "step": 1265 }, { "epoch": 0.26, "learning_rate": 0.0008709339273108507, "loss": 2.1709, "step": 1266 }, { "epoch": 0.26, "learning_rate": 0.0008707134472275041, "loss": 2.0762, "step": 1267 }, { "epoch": 0.26, "learning_rate": 0.0008704928069436054, "loss": 2.1533, "step": 1268 }, { "epoch": 0.26, "learning_rate": 0.0008702720065545023, "loss": 2.2275, "step": 1269 }, { "epoch": 0.26, "learning_rate": 0.0008700510461556117, "loss": 2.0713, "step": 1270 }, { "epoch": 0.26, "learning_rate": 0.0008698299258424198, "loss": 2.1641, "step": 1271 }, { "epoch": 0.26, "learning_rate": 0.000869608645710482, "loss": 2.1396, "step": 1272 }, { "epoch": 0.26, "learning_rate": 0.000869387205855422, "loss": 2.1562, "step": 1273 }, { "epoch": 0.26, "learning_rate": 0.0008691656063729334, "loss": 2.2041, "step": 1274 }, { "epoch": 0.26, "learning_rate": 0.0008689438473587785, "loss": 2.1533, "step": 1275 }, { "epoch": 0.26, "learning_rate": 0.0008687219289087885, "loss": 2.1348, "step": 1276 }, { "epoch": 0.26, "learning_rate": 0.0008684998511188633, "loss": 2.1895, "step": 1277 }, { "epoch": 0.26, "learning_rate": 0.0008682776140849722, "loss": 2.2139, "step": 1278 }, { "epoch": 0.26, "learning_rate": 0.0008680552179031527, "loss": 2.1562, "step": 1279 }, { "epoch": 0.26, "learning_rate": 0.0008678326626695114, "loss": 2.1914, "step": 1280 }, { "epoch": 0.26, "learning_rate": 0.0008676099484802238, "loss": 2.0693, "step": 1281 }, { "epoch": 0.26, "learning_rate": 0.0008673870754315336, "loss": 2.1348, "step": 1282 }, { "epoch": 0.26, "learning_rate": 0.0008671640436197537, "loss": 2.1377, "step": 1283 }, { "epoch": 0.26, "learning_rate": 0.0008669408531412651, "loss": 2.2422, "step": 1284 }, { "epoch": 0.26, "learning_rate": 0.0008667175040925174, "loss": 2.1484, "step": 1285 }, { "epoch": 0.26, "learning_rate": 0.0008664939965700296, "loss": 2.1826, "step": 1286 }, { "epoch": 0.26, "learning_rate": 0.0008662703306703878, "loss": 2.125, "step": 1287 }, { "epoch": 0.26, "learning_rate": 0.0008660465064902477, "loss": 2.1084, "step": 1288 }, { "epoch": 0.26, "learning_rate": 0.0008658225241263329, "loss": 2.1885, "step": 1289 }, { "epoch": 0.26, "learning_rate": 0.000865598383675435, "loss": 2.0977, "step": 1290 }, { "epoch": 0.26, "learning_rate": 0.0008653740852344149, "loss": 2.1807, "step": 1291 }, { "epoch": 0.26, "learning_rate": 0.000865149628900201, "loss": 2.248, "step": 1292 }, { "epoch": 0.26, "learning_rate": 0.0008649250147697898, "loss": 2.1592, "step": 1293 }, { "epoch": 0.26, "learning_rate": 0.0008647002429402467, "loss": 2.1162, "step": 1294 }, { "epoch": 0.26, "learning_rate": 0.0008644753135087047, "loss": 2.1621, "step": 1295 }, { "epoch": 0.26, "learning_rate": 0.0008642502265723653, "loss": 2.1377, "step": 1296 }, { "epoch": 0.26, "learning_rate": 0.0008640249822284975, "loss": 2.1729, "step": 1297 }, { "epoch": 0.26, "learning_rate": 0.0008637995805744387, "loss": 2.0908, "step": 1298 }, { "epoch": 0.26, "learning_rate": 0.0008635740217075946, "loss": 2.1494, "step": 1299 }, { "epoch": 0.26, "learning_rate": 0.0008633483057254381, "loss": 2.2344, "step": 1300 }, { "epoch": 0.26, "learning_rate": 0.0008631224327255105, "loss": 2.0938, "step": 1301 }, { "epoch": 0.26, "learning_rate": 0.0008628964028054208, "loss": 2.1123, "step": 1302 }, { "epoch": 0.26, "learning_rate": 0.000862670216062846, "loss": 2.1162, "step": 1303 }, { "epoch": 0.26, "learning_rate": 0.0008624438725955305, "loss": 2.0459, "step": 1304 }, { "epoch": 0.26, "learning_rate": 0.0008622173725012867, "loss": 2.124, "step": 1305 }, { "epoch": 0.27, "learning_rate": 0.0008619907158779947, "loss": 2.0742, "step": 1306 }, { "epoch": 0.27, "learning_rate": 0.0008617639028236021, "loss": 2.2012, "step": 1307 }, { "epoch": 0.27, "learning_rate": 0.0008615369334361241, "loss": 2.1504, "step": 1308 }, { "epoch": 0.27, "learning_rate": 0.0008613098078136437, "loss": 2.1992, "step": 1309 }, { "epoch": 0.27, "learning_rate": 0.0008610825260543109, "loss": 2.084, "step": 1310 }, { "epoch": 0.27, "learning_rate": 0.000860855088256344, "loss": 2.0938, "step": 1311 }, { "epoch": 0.27, "learning_rate": 0.0008606274945180279, "loss": 2.2041, "step": 1312 }, { "epoch": 0.27, "learning_rate": 0.0008603997449377152, "loss": 2.0566, "step": 1313 }, { "epoch": 0.27, "learning_rate": 0.0008601718396138262, "loss": 2.2012, "step": 1314 }, { "epoch": 0.27, "learning_rate": 0.0008599437786448479, "loss": 2.1299, "step": 1315 }, { "epoch": 0.27, "learning_rate": 0.0008597155621293349, "loss": 2.1777, "step": 1316 }, { "epoch": 0.27, "learning_rate": 0.0008594871901659089, "loss": 2.2051, "step": 1317 }, { "epoch": 0.27, "learning_rate": 0.0008592586628532588, "loss": 2.0771, "step": 1318 }, { "epoch": 0.27, "learning_rate": 0.0008590299802901411, "loss": 2.1953, "step": 1319 }, { "epoch": 0.27, "learning_rate": 0.0008588011425753783, "loss": 2.0117, "step": 1320 }, { "epoch": 0.27, "learning_rate": 0.0008585721498078611, "loss": 2.1562, "step": 1321 }, { "epoch": 0.27, "learning_rate": 0.0008583430020865464, "loss": 2.0488, "step": 1322 }, { "epoch": 0.27, "learning_rate": 0.0008581136995104585, "loss": 2.1348, "step": 1323 }, { "epoch": 0.27, "learning_rate": 0.0008578842421786887, "loss": 2.1426, "step": 1324 }, { "epoch": 0.27, "learning_rate": 0.0008576546301903947, "loss": 2.084, "step": 1325 }, { "epoch": 0.27, "learning_rate": 0.0008574248636448013, "loss": 2.1709, "step": 1326 }, { "epoch": 0.27, "learning_rate": 0.0008571949426412002, "loss": 2.123, "step": 1327 }, { "epoch": 0.27, "learning_rate": 0.0008569648672789496, "loss": 2.2002, "step": 1328 }, { "epoch": 0.27, "learning_rate": 0.0008567346376574746, "loss": 2.0967, "step": 1329 }, { "epoch": 0.27, "learning_rate": 0.0008565042538762669, "loss": 2.1035, "step": 1330 }, { "epoch": 0.27, "learning_rate": 0.000856273716034885, "loss": 2.1855, "step": 1331 }, { "epoch": 0.27, "learning_rate": 0.0008560430242329536, "loss": 2.0771, "step": 1332 }, { "epoch": 0.27, "learning_rate": 0.0008558121785701643, "loss": 2.1201, "step": 1333 }, { "epoch": 0.27, "learning_rate": 0.0008555811791462746, "loss": 2.1895, "step": 1334 }, { "epoch": 0.27, "learning_rate": 0.0008553500260611094, "loss": 2.1816, "step": 1335 }, { "epoch": 0.27, "learning_rate": 0.000855118719414559, "loss": 2.0947, "step": 1336 }, { "epoch": 0.27, "learning_rate": 0.0008548872593065811, "loss": 2.1855, "step": 1337 }, { "epoch": 0.27, "learning_rate": 0.0008546556458371987, "loss": 2.1094, "step": 1338 }, { "epoch": 0.27, "learning_rate": 0.0008544238791065016, "loss": 2.1738, "step": 1339 }, { "epoch": 0.27, "learning_rate": 0.0008541919592146458, "loss": 2.123, "step": 1340 }, { "epoch": 0.27, "learning_rate": 0.0008539598862618533, "loss": 2.1582, "step": 1341 }, { "epoch": 0.27, "learning_rate": 0.0008537276603484128, "loss": 2.1104, "step": 1342 }, { "epoch": 0.27, "learning_rate": 0.0008534952815746782, "loss": 2.0547, "step": 1343 }, { "epoch": 0.27, "learning_rate": 0.0008532627500410702, "loss": 2.1348, "step": 1344 }, { "epoch": 0.27, "learning_rate": 0.0008530300658480752, "loss": 2.1084, "step": 1345 }, { "epoch": 0.27, "learning_rate": 0.0008527972290962455, "loss": 2.083, "step": 1346 }, { "epoch": 0.27, "learning_rate": 0.0008525642398861997, "loss": 2.1377, "step": 1347 }, { "epoch": 0.27, "learning_rate": 0.0008523310983186219, "loss": 2.1064, "step": 1348 }, { "epoch": 0.27, "learning_rate": 0.0008520978044942622, "loss": 2.0986, "step": 1349 }, { "epoch": 0.27, "learning_rate": 0.0008518643585139364, "loss": 2.0986, "step": 1350 }, { "epoch": 0.27, "learning_rate": 0.0008516307604785261, "loss": 2.1543, "step": 1351 }, { "epoch": 0.27, "learning_rate": 0.000851397010488979, "loss": 2.1475, "step": 1352 }, { "epoch": 0.27, "learning_rate": 0.0008511631086463074, "loss": 2.1992, "step": 1353 }, { "epoch": 0.27, "learning_rate": 0.0008509290550515907, "loss": 2.1611, "step": 1354 }, { "epoch": 0.28, "learning_rate": 0.0008506948498059726, "loss": 2.0312, "step": 1355 }, { "epoch": 0.28, "learning_rate": 0.0008504604930106628, "loss": 2.1719, "step": 1356 }, { "epoch": 0.28, "learning_rate": 0.0008502259847669371, "loss": 2.0986, "step": 1357 }, { "epoch": 0.28, "learning_rate": 0.0008499913251761353, "loss": 2.0605, "step": 1358 }, { "epoch": 0.28, "learning_rate": 0.0008497565143396643, "loss": 2.125, "step": 1359 }, { "epoch": 0.28, "learning_rate": 0.000849521552358995, "loss": 2.1201, "step": 1360 }, { "epoch": 0.28, "learning_rate": 0.0008492864393356644, "loss": 2.1426, "step": 1361 }, { "epoch": 0.28, "learning_rate": 0.0008490511753712744, "loss": 2.1387, "step": 1362 }, { "epoch": 0.28, "learning_rate": 0.0008488157605674924, "loss": 2.2305, "step": 1363 }, { "epoch": 0.28, "learning_rate": 0.0008485801950260507, "loss": 2.1162, "step": 1364 }, { "epoch": 0.28, "learning_rate": 0.0008483444788487469, "loss": 2.1416, "step": 1365 }, { "epoch": 0.28, "learning_rate": 0.0008481086121374437, "loss": 2.1182, "step": 1366 }, { "epoch": 0.28, "learning_rate": 0.0008478725949940686, "loss": 2.168, "step": 1367 }, { "epoch": 0.28, "learning_rate": 0.0008476364275206149, "loss": 2.1162, "step": 1368 }, { "epoch": 0.28, "learning_rate": 0.0008474001098191398, "loss": 2.1025, "step": 1369 }, { "epoch": 0.28, "learning_rate": 0.0008471636419917659, "loss": 2.1602, "step": 1370 }, { "epoch": 0.28, "learning_rate": 0.000846927024140681, "loss": 2.1953, "step": 1371 }, { "epoch": 0.28, "learning_rate": 0.0008466902563681371, "loss": 2.125, "step": 1372 }, { "epoch": 0.28, "learning_rate": 0.0008464533387764517, "loss": 2.0547, "step": 1373 }, { "epoch": 0.28, "learning_rate": 0.0008462162714680062, "loss": 2.1064, "step": 1374 }, { "epoch": 0.28, "learning_rate": 0.0008459790545452475, "loss": 2.1094, "step": 1375 }, { "epoch": 0.28, "learning_rate": 0.0008457416881106866, "loss": 2.123, "step": 1376 }, { "epoch": 0.28, "learning_rate": 0.0008455041722668994, "loss": 2.2158, "step": 1377 }, { "epoch": 0.28, "learning_rate": 0.0008452665071165262, "loss": 2.1094, "step": 1378 }, { "epoch": 0.28, "learning_rate": 0.0008450286927622721, "loss": 2.1348, "step": 1379 }, { "epoch": 0.28, "learning_rate": 0.0008447907293069063, "loss": 1.9922, "step": 1380 }, { "epoch": 0.28, "learning_rate": 0.0008445526168532626, "loss": 2.1025, "step": 1381 }, { "epoch": 0.28, "learning_rate": 0.0008443143555042393, "loss": 2.1768, "step": 1382 }, { "epoch": 0.28, "learning_rate": 0.0008440759453627989, "loss": 2.1621, "step": 1383 }, { "epoch": 0.28, "learning_rate": 0.0008438373865319683, "loss": 2.0693, "step": 1384 }, { "epoch": 0.28, "learning_rate": 0.0008435986791148385, "loss": 2.2158, "step": 1385 }, { "epoch": 0.28, "learning_rate": 0.0008433598232145647, "loss": 2.0928, "step": 1386 }, { "epoch": 0.28, "learning_rate": 0.0008431208189343669, "loss": 2.1621, "step": 1387 }, { "epoch": 0.28, "learning_rate": 0.0008428816663775281, "loss": 2.1211, "step": 1388 }, { "epoch": 0.28, "learning_rate": 0.000842642365647396, "loss": 2.1152, "step": 1389 }, { "epoch": 0.28, "learning_rate": 0.0008424029168473828, "loss": 2.1582, "step": 1390 }, { "epoch": 0.28, "learning_rate": 0.0008421633200809637, "loss": 2.2158, "step": 1391 }, { "epoch": 0.28, "learning_rate": 0.0008419235754516788, "loss": 2.0908, "step": 1392 }, { "epoch": 0.28, "learning_rate": 0.0008416836830631311, "loss": 2.1152, "step": 1393 }, { "epoch": 0.28, "learning_rate": 0.0008414436430189882, "loss": 2.1045, "step": 1394 }, { "epoch": 0.28, "learning_rate": 0.0008412034554229814, "loss": 2.1582, "step": 1395 }, { "epoch": 0.28, "learning_rate": 0.0008409631203789056, "loss": 2.1357, "step": 1396 }, { "epoch": 0.28, "learning_rate": 0.0008407226379906197, "loss": 2.1855, "step": 1397 }, { "epoch": 0.28, "learning_rate": 0.0008404820083620455, "loss": 2.1035, "step": 1398 }, { "epoch": 0.28, "learning_rate": 0.0008402412315971692, "loss": 2.1777, "step": 1399 }, { "epoch": 0.28, "learning_rate": 0.0008400003078000407, "loss": 2.1738, "step": 1400 }, { "epoch": 0.28, "learning_rate": 0.0008397592370747726, "loss": 2.0889, "step": 1401 }, { "epoch": 0.28, "learning_rate": 0.0008395180195255419, "loss": 2.127, "step": 1402 }, { "epoch": 0.28, "learning_rate": 0.0008392766552565885, "loss": 2.1221, "step": 1403 }, { "epoch": 0.28, "learning_rate": 0.0008390351443722157, "loss": 2.1699, "step": 1404 }, { "epoch": 0.29, "learning_rate": 0.0008387934869767904, "loss": 2.0342, "step": 1405 }, { "epoch": 0.29, "learning_rate": 0.0008385516831747429, "loss": 2.0869, "step": 1406 }, { "epoch": 0.29, "learning_rate": 0.0008383097330705662, "loss": 2.2363, "step": 1407 }, { "epoch": 0.29, "learning_rate": 0.0008380676367688171, "loss": 2.1465, "step": 1408 }, { "epoch": 0.29, "learning_rate": 0.0008378253943741157, "loss": 2.1035, "step": 1409 }, { "epoch": 0.29, "learning_rate": 0.0008375830059911442, "loss": 2.127, "step": 1410 }, { "epoch": 0.29, "learning_rate": 0.0008373404717246494, "loss": 2.1865, "step": 1411 }, { "epoch": 0.29, "learning_rate": 0.0008370977916794397, "loss": 2.2129, "step": 1412 }, { "epoch": 0.29, "learning_rate": 0.0008368549659603876, "loss": 2.0654, "step": 1413 }, { "epoch": 0.29, "learning_rate": 0.0008366119946724279, "loss": 2.1357, "step": 1414 }, { "epoch": 0.29, "learning_rate": 0.0008363688779205585, "loss": 2.0469, "step": 1415 }, { "epoch": 0.29, "learning_rate": 0.0008361256158098401, "loss": 2.1543, "step": 1416 }, { "epoch": 0.29, "learning_rate": 0.0008358822084453964, "loss": 2.1631, "step": 1417 }, { "epoch": 0.29, "learning_rate": 0.0008356386559324137, "loss": 2.123, "step": 1418 }, { "epoch": 0.29, "learning_rate": 0.0008353949583761411, "loss": 2.1572, "step": 1419 }, { "epoch": 0.29, "learning_rate": 0.0008351511158818904, "loss": 2.168, "step": 1420 }, { "epoch": 0.29, "learning_rate": 0.0008349071285550358, "loss": 2.1367, "step": 1421 }, { "epoch": 0.29, "learning_rate": 0.0008346629965010146, "loss": 2.1621, "step": 1422 }, { "epoch": 0.29, "learning_rate": 0.0008344187198253259, "loss": 2.1719, "step": 1423 }, { "epoch": 0.29, "learning_rate": 0.0008341742986335321, "loss": 2.084, "step": 1424 }, { "epoch": 0.29, "learning_rate": 0.0008339297330312574, "loss": 2.167, "step": 1425 }, { "epoch": 0.29, "learning_rate": 0.0008336850231241888, "loss": 2.1396, "step": 1426 }, { "epoch": 0.29, "learning_rate": 0.0008334401690180755, "loss": 2.2148, "step": 1427 }, { "epoch": 0.29, "learning_rate": 0.0008331951708187291, "loss": 2.2051, "step": 1428 }, { "epoch": 0.29, "learning_rate": 0.0008329500286320233, "loss": 2.0791, "step": 1429 }, { "epoch": 0.29, "learning_rate": 0.0008327047425638942, "loss": 2.1777, "step": 1430 }, { "epoch": 0.29, "learning_rate": 0.00083245931272034, "loss": 2.1836, "step": 1431 }, { "epoch": 0.29, "learning_rate": 0.0008322137392074211, "loss": 2.0967, "step": 1432 }, { "epoch": 0.29, "learning_rate": 0.00083196802213126, "loss": 2.0088, "step": 1433 }, { "epoch": 0.29, "learning_rate": 0.000831722161598041, "loss": 2.1094, "step": 1434 }, { "epoch": 0.29, "learning_rate": 0.0008314761577140105, "loss": 2.1172, "step": 1435 }, { "epoch": 0.29, "learning_rate": 0.0008312300105854771, "loss": 2.0596, "step": 1436 }, { "epoch": 0.29, "learning_rate": 0.0008309837203188111, "loss": 2.125, "step": 1437 }, { "epoch": 0.29, "learning_rate": 0.0008307372870204446, "loss": 2.1777, "step": 1438 }, { "epoch": 0.29, "learning_rate": 0.0008304907107968716, "loss": 2.1582, "step": 1439 }, { "epoch": 0.29, "learning_rate": 0.0008302439917546478, "loss": 2.1025, "step": 1440 }, { "epoch": 0.29, "learning_rate": 0.0008299971300003908, "loss": 2.1211, "step": 1441 }, { "epoch": 0.29, "learning_rate": 0.0008297501256407795, "loss": 2.1709, "step": 1442 }, { "epoch": 0.29, "learning_rate": 0.000829502978782555, "loss": 2.0312, "step": 1443 }, { "epoch": 0.29, "learning_rate": 0.0008292556895325194, "loss": 2.0732, "step": 1444 }, { "epoch": 0.29, "learning_rate": 0.0008290082579975364, "loss": 2.1152, "step": 1445 }, { "epoch": 0.29, "learning_rate": 0.0008287606842845319, "loss": 2.126, "step": 1446 }, { "epoch": 0.29, "learning_rate": 0.0008285129685004922, "loss": 2.0977, "step": 1447 }, { "epoch": 0.29, "learning_rate": 0.0008282651107524657, "loss": 2.124, "step": 1448 }, { "epoch": 0.29, "learning_rate": 0.0008280171111475619, "loss": 2.0928, "step": 1449 }, { "epoch": 0.29, "learning_rate": 0.0008277689697929516, "loss": 2.1611, "step": 1450 }, { "epoch": 0.29, "learning_rate": 0.0008275206867958671, "loss": 2.1689, "step": 1451 }, { "epoch": 0.29, "learning_rate": 0.0008272722622636015, "loss": 2.1924, "step": 1452 }, { "epoch": 0.29, "learning_rate": 0.0008270236963035092, "loss": 2.1934, "step": 1453 }, { "epoch": 0.3, "learning_rate": 0.000826774989023006, "loss": 2.2363, "step": 1454 }, { "epoch": 0.3, "learning_rate": 0.0008265261405295685, "loss": 2.1387, "step": 1455 }, { "epoch": 0.3, "learning_rate": 0.0008262771509307345, "loss": 2.1211, "step": 1456 }, { "epoch": 0.3, "learning_rate": 0.0008260280203341026, "loss": 2.1064, "step": 1457 }, { "epoch": 0.3, "learning_rate": 0.0008257787488473321, "loss": 2.1387, "step": 1458 }, { "epoch": 0.3, "learning_rate": 0.000825529336578144, "loss": 2.125, "step": 1459 }, { "epoch": 0.3, "learning_rate": 0.000825279783634319, "loss": 2.0811, "step": 1460 }, { "epoch": 0.3, "learning_rate": 0.0008250300901236998, "loss": 2.1016, "step": 1461 }, { "epoch": 0.3, "learning_rate": 0.0008247802561541889, "loss": 2.0791, "step": 1462 }, { "epoch": 0.3, "learning_rate": 0.00082453028183375, "loss": 2.1484, "step": 1463 }, { "epoch": 0.3, "learning_rate": 0.0008242801672704072, "loss": 2.0723, "step": 1464 }, { "epoch": 0.3, "learning_rate": 0.0008240299125722454, "loss": 2.0811, "step": 1465 }, { "epoch": 0.3, "learning_rate": 0.0008237795178474098, "loss": 2.125, "step": 1466 }, { "epoch": 0.3, "learning_rate": 0.0008235289832041065, "loss": 2.1982, "step": 1467 }, { "epoch": 0.3, "learning_rate": 0.0008232783087506017, "loss": 2.124, "step": 1468 }, { "epoch": 0.3, "learning_rate": 0.000823027494595222, "loss": 2.1465, "step": 1469 }, { "epoch": 0.3, "learning_rate": 0.000822776540846355, "loss": 2.1309, "step": 1470 }, { "epoch": 0.3, "learning_rate": 0.0008225254476124477, "loss": 2.1035, "step": 1471 }, { "epoch": 0.3, "learning_rate": 0.0008222742150020082, "loss": 2.082, "step": 1472 }, { "epoch": 0.3, "learning_rate": 0.0008220228431236041, "loss": 2.1025, "step": 1473 }, { "epoch": 0.3, "learning_rate": 0.0008217713320858637, "loss": 2.1553, "step": 1474 }, { "epoch": 0.3, "learning_rate": 0.0008215196819974754, "loss": 2.1318, "step": 1475 }, { "epoch": 0.3, "learning_rate": 0.0008212678929671873, "loss": 2.1348, "step": 1476 }, { "epoch": 0.3, "learning_rate": 0.0008210159651038081, "loss": 2.0732, "step": 1477 }, { "epoch": 0.3, "learning_rate": 0.0008207638985162061, "loss": 2.1084, "step": 1478 }, { "epoch": 0.3, "learning_rate": 0.0008205116933133094, "loss": 2.1113, "step": 1479 }, { "epoch": 0.3, "learning_rate": 0.0008202593496041067, "loss": 2.1191, "step": 1480 }, { "epoch": 0.3, "learning_rate": 0.0008200068674976458, "loss": 2.1289, "step": 1481 }, { "epoch": 0.3, "learning_rate": 0.0008197542471030348, "loss": 2.0566, "step": 1482 }, { "epoch": 0.3, "learning_rate": 0.0008195014885294414, "loss": 2.0684, "step": 1483 }, { "epoch": 0.3, "learning_rate": 0.0008192485918860927, "loss": 2.2266, "step": 1484 }, { "epoch": 0.3, "learning_rate": 0.0008189955572822761, "loss": 2.168, "step": 1485 }, { "epoch": 0.3, "learning_rate": 0.0008187423848273379, "loss": 2.0635, "step": 1486 }, { "epoch": 0.3, "learning_rate": 0.0008184890746306848, "loss": 2.1367, "step": 1487 }, { "epoch": 0.3, "learning_rate": 0.0008182356268017822, "loss": 2.0635, "step": 1488 }, { "epoch": 0.3, "learning_rate": 0.0008179820414501556, "loss": 2.0996, "step": 1489 }, { "epoch": 0.3, "learning_rate": 0.0008177283186853895, "loss": 2.0957, "step": 1490 }, { "epoch": 0.3, "learning_rate": 0.0008174744586171281, "loss": 2.0537, "step": 1491 }, { "epoch": 0.3, "learning_rate": 0.0008172204613550747, "loss": 2.166, "step": 1492 }, { "epoch": 0.3, "learning_rate": 0.000816966327008992, "loss": 2.1406, "step": 1493 }, { "epoch": 0.3, "learning_rate": 0.0008167120556887018, "loss": 2.1367, "step": 1494 }, { "epoch": 0.3, "learning_rate": 0.0008164576475040854, "loss": 2.0898, "step": 1495 }, { "epoch": 0.3, "learning_rate": 0.0008162031025650831, "loss": 2.1514, "step": 1496 }, { "epoch": 0.3, "learning_rate": 0.0008159484209816941, "loss": 2.0439, "step": 1497 }, { "epoch": 0.3, "learning_rate": 0.0008156936028639768, "loss": 2.0625, "step": 1498 }, { "epoch": 0.3, "learning_rate": 0.0008154386483220486, "loss": 2.0918, "step": 1499 }, { "epoch": 0.3, "learning_rate": 0.0008151835574660862, "loss": 2.1318, "step": 1500 }, { "epoch": 0.3, "learning_rate": 0.0008149283304063243, "loss": 2.0635, "step": 1501 }, { "epoch": 0.3, "learning_rate": 0.0008146729672530574, "loss": 2.165, "step": 1502 }, { "epoch": 0.31, "learning_rate": 0.0008144174681166383, "loss": 2.1133, "step": 1503 }, { "epoch": 0.31, "learning_rate": 0.0008141618331074788, "loss": 2.0732, "step": 1504 }, { "epoch": 0.31, "learning_rate": 0.0008139060623360494, "loss": 2.0723, "step": 1505 }, { "epoch": 0.31, "learning_rate": 0.0008136501559128788, "loss": 2.0342, "step": 1506 }, { "epoch": 0.31, "learning_rate": 0.0008133941139485551, "loss": 2.0957, "step": 1507 }, { "epoch": 0.31, "learning_rate": 0.0008131379365537245, "loss": 2.1494, "step": 1508 }, { "epoch": 0.31, "learning_rate": 0.0008128816238390916, "loss": 2.1201, "step": 1509 }, { "epoch": 0.31, "learning_rate": 0.0008126251759154199, "loss": 2.0811, "step": 1510 }, { "epoch": 0.31, "learning_rate": 0.0008123685928935312, "loss": 2.0498, "step": 1511 }, { "epoch": 0.31, "learning_rate": 0.0008121118748843052, "loss": 2.1074, "step": 1512 }, { "epoch": 0.31, "learning_rate": 0.0008118550219986807, "loss": 2.0488, "step": 1513 }, { "epoch": 0.31, "learning_rate": 0.0008115980343476542, "loss": 2.0947, "step": 1514 }, { "epoch": 0.31, "learning_rate": 0.0008113409120422808, "loss": 2.1367, "step": 1515 }, { "epoch": 0.31, "learning_rate": 0.0008110836551936735, "loss": 2.1436, "step": 1516 }, { "epoch": 0.31, "learning_rate": 0.0008108262639130037, "loss": 2.1221, "step": 1517 }, { "epoch": 0.31, "learning_rate": 0.0008105687383115008, "loss": 2.1787, "step": 1518 }, { "epoch": 0.31, "learning_rate": 0.000810311078500452, "loss": 2.0508, "step": 1519 }, { "epoch": 0.31, "learning_rate": 0.0008100532845912028, "loss": 2.123, "step": 1520 }, { "epoch": 0.31, "learning_rate": 0.0008097953566951568, "loss": 2.0, "step": 1521 }, { "epoch": 0.31, "learning_rate": 0.0008095372949237751, "loss": 2.0605, "step": 1522 }, { "epoch": 0.31, "learning_rate": 0.0008092790993885768, "loss": 2.1484, "step": 1523 }, { "epoch": 0.31, "learning_rate": 0.000809020770201139, "loss": 2.083, "step": 1524 }, { "epoch": 0.31, "learning_rate": 0.000808762307473096, "loss": 2.0918, "step": 1525 }, { "epoch": 0.31, "learning_rate": 0.0008085037113161406, "loss": 2.2012, "step": 1526 }, { "epoch": 0.31, "learning_rate": 0.0008082449818420226, "loss": 2.1006, "step": 1527 }, { "epoch": 0.31, "learning_rate": 0.0008079861191625497, "loss": 2.0674, "step": 1528 }, { "epoch": 0.31, "learning_rate": 0.0008077271233895872, "loss": 2.166, "step": 1529 }, { "epoch": 0.31, "learning_rate": 0.0008074679946350578, "loss": 2.1387, "step": 1530 }, { "epoch": 0.31, "learning_rate": 0.0008072087330109418, "loss": 2.082, "step": 1531 }, { "epoch": 0.31, "learning_rate": 0.0008069493386292768, "loss": 2.1064, "step": 1532 }, { "epoch": 0.31, "learning_rate": 0.0008066898116021576, "loss": 2.1895, "step": 1533 }, { "epoch": 0.31, "learning_rate": 0.0008064301520417367, "loss": 2.1348, "step": 1534 }, { "epoch": 0.31, "learning_rate": 0.0008061703600602238, "loss": 2.0879, "step": 1535 }, { "epoch": 0.31, "learning_rate": 0.0008059104357698854, "loss": 2.1113, "step": 1536 }, { "epoch": 0.31, "learning_rate": 0.0008056503792830458, "loss": 2.085, "step": 1537 }, { "epoch": 0.31, "learning_rate": 0.000805390190712086, "loss": 2.1113, "step": 1538 }, { "epoch": 0.31, "learning_rate": 0.0008051298701694441, "loss": 2.1045, "step": 1539 }, { "epoch": 0.31, "learning_rate": 0.0008048694177676156, "loss": 2.0674, "step": 1540 }, { "epoch": 0.31, "learning_rate": 0.0008046088336191524, "loss": 2.0947, "step": 1541 }, { "epoch": 0.31, "learning_rate": 0.0008043481178366638, "loss": 2.1592, "step": 1542 }, { "epoch": 0.31, "learning_rate": 0.000804087270532816, "loss": 2.1299, "step": 1543 }, { "epoch": 0.31, "learning_rate": 0.0008038262918203314, "loss": 2.0566, "step": 1544 }, { "epoch": 0.31, "learning_rate": 0.0008035651818119902, "loss": 2.1191, "step": 1545 }, { "epoch": 0.31, "learning_rate": 0.0008033039406206282, "loss": 2.1084, "step": 1546 }, { "epoch": 0.31, "learning_rate": 0.0008030425683591391, "loss": 2.1631, "step": 1547 }, { "epoch": 0.31, "learning_rate": 0.0008027810651404721, "loss": 2.1279, "step": 1548 }, { "epoch": 0.31, "learning_rate": 0.0008025194310776338, "loss": 2.0713, "step": 1549 }, { "epoch": 0.31, "learning_rate": 0.0008022576662836871, "loss": 2.1289, "step": 1550 }, { "epoch": 0.31, "learning_rate": 0.0008019957708717512, "loss": 2.1133, "step": 1551 }, { "epoch": 0.31, "learning_rate": 0.000801733744955002, "loss": 2.0938, "step": 1552 }, { "epoch": 0.32, "learning_rate": 0.0008014715886466716, "loss": 2.0752, "step": 1553 }, { "epoch": 0.32, "learning_rate": 0.0008012093020600486, "loss": 2.0908, "step": 1554 }, { "epoch": 0.32, "learning_rate": 0.0008009468853084775, "loss": 2.1309, "step": 1555 }, { "epoch": 0.32, "learning_rate": 0.0008006843385053601, "loss": 2.0596, "step": 1556 }, { "epoch": 0.32, "learning_rate": 0.0008004216617641532, "loss": 2.1475, "step": 1557 }, { "epoch": 0.32, "learning_rate": 0.0008001588551983703, "loss": 2.0781, "step": 1558 }, { "epoch": 0.32, "learning_rate": 0.0007998959189215811, "loss": 2.1504, "step": 1559 }, { "epoch": 0.32, "learning_rate": 0.0007996328530474108, "loss": 2.1309, "step": 1560 }, { "epoch": 0.32, "learning_rate": 0.0007993696576895415, "loss": 2.0029, "step": 1561 }, { "epoch": 0.32, "learning_rate": 0.0007991063329617106, "loss": 2.1348, "step": 1562 }, { "epoch": 0.32, "learning_rate": 0.0007988428789777112, "loss": 2.1797, "step": 1563 }, { "epoch": 0.32, "learning_rate": 0.0007985792958513931, "loss": 2.0928, "step": 1564 }, { "epoch": 0.32, "learning_rate": 0.0007983155836966613, "loss": 2.1494, "step": 1565 }, { "epoch": 0.32, "learning_rate": 0.0007980517426274765, "loss": 2.1094, "step": 1566 }, { "epoch": 0.32, "learning_rate": 0.0007977877727578555, "loss": 2.1523, "step": 1567 }, { "epoch": 0.32, "learning_rate": 0.0007975236742018704, "loss": 2.1787, "step": 1568 }, { "epoch": 0.32, "learning_rate": 0.0007972594470736492, "loss": 2.3047, "step": 1569 }, { "epoch": 0.32, "learning_rate": 0.0007969950914873753, "loss": 2.1016, "step": 1570 }, { "epoch": 0.32, "learning_rate": 0.0007967306075572875, "loss": 2.0703, "step": 1571 }, { "epoch": 0.32, "learning_rate": 0.0007964659953976805, "loss": 2.0742, "step": 1572 }, { "epoch": 0.32, "learning_rate": 0.0007962012551229038, "loss": 2.1689, "step": 1573 }, { "epoch": 0.32, "learning_rate": 0.0007959363868473625, "loss": 2.0645, "step": 1574 }, { "epoch": 0.32, "learning_rate": 0.0007956713906855178, "loss": 2.0664, "step": 1575 }, { "epoch": 0.32, "learning_rate": 0.0007954062667518845, "loss": 2.1709, "step": 1576 }, { "epoch": 0.32, "learning_rate": 0.0007951410151610343, "loss": 2.1484, "step": 1577 }, { "epoch": 0.32, "learning_rate": 0.0007948756360275929, "loss": 2.0615, "step": 1578 }, { "epoch": 0.32, "learning_rate": 0.0007946101294662418, "loss": 2.0762, "step": 1579 }, { "epoch": 0.32, "learning_rate": 0.0007943444955917173, "loss": 2.1797, "step": 1580 }, { "epoch": 0.32, "learning_rate": 0.0007940787345188106, "loss": 2.0518, "step": 1581 }, { "epoch": 0.32, "learning_rate": 0.0007938128463623682, "loss": 2.123, "step": 1582 }, { "epoch": 0.32, "learning_rate": 0.0007935468312372911, "loss": 2.0674, "step": 1583 }, { "epoch": 0.32, "learning_rate": 0.0007932806892585356, "loss": 2.2012, "step": 1584 }, { "epoch": 0.32, "learning_rate": 0.0007930144205411125, "loss": 2.1104, "step": 1585 }, { "epoch": 0.32, "learning_rate": 0.0007927480252000875, "loss": 2.1816, "step": 1586 }, { "epoch": 0.32, "learning_rate": 0.000792481503350581, "loss": 2.1816, "step": 1587 }, { "epoch": 0.32, "learning_rate": 0.0007922148551077682, "loss": 2.0469, "step": 1588 }, { "epoch": 0.32, "learning_rate": 0.0007919480805868784, "loss": 2.0869, "step": 1589 }, { "epoch": 0.32, "learning_rate": 0.0007916811799031963, "loss": 2.0576, "step": 1590 }, { "epoch": 0.32, "learning_rate": 0.0007914141531720607, "loss": 2.1309, "step": 1591 }, { "epoch": 0.32, "learning_rate": 0.0007911470005088643, "loss": 2.0908, "step": 1592 }, { "epoch": 0.32, "learning_rate": 0.0007908797220290553, "loss": 2.1191, "step": 1593 }, { "epoch": 0.32, "learning_rate": 0.0007906123178481357, "loss": 2.1621, "step": 1594 }, { "epoch": 0.32, "learning_rate": 0.0007903447880816617, "loss": 2.1445, "step": 1595 }, { "epoch": 0.32, "learning_rate": 0.0007900771328452438, "loss": 2.1006, "step": 1596 }, { "epoch": 0.32, "learning_rate": 0.0007898093522545471, "loss": 2.0469, "step": 1597 }, { "epoch": 0.32, "learning_rate": 0.0007895414464252906, "loss": 2.1309, "step": 1598 }, { "epoch": 0.32, "learning_rate": 0.0007892734154732474, "loss": 2.0938, "step": 1599 }, { "epoch": 0.32, "learning_rate": 0.0007890052595142446, "loss": 2.1348, "step": 1600 }, { "epoch": 0.32, "learning_rate": 0.0007887369786641637, "loss": 2.1113, "step": 1601 }, { "epoch": 0.33, "learning_rate": 0.0007884685730389397, "loss": 2.0664, "step": 1602 }, { "epoch": 0.33, "learning_rate": 0.0007882000427545617, "loss": 2.1396, "step": 1603 }, { "epoch": 0.33, "learning_rate": 0.0007879313879270729, "loss": 2.1104, "step": 1604 }, { "epoch": 0.33, "learning_rate": 0.00078766260867257, "loss": 2.1328, "step": 1605 }, { "epoch": 0.33, "learning_rate": 0.0007873937051072036, "loss": 2.0332, "step": 1606 }, { "epoch": 0.33, "learning_rate": 0.0007871246773471779, "loss": 2.0508, "step": 1607 }, { "epoch": 0.33, "learning_rate": 0.0007868555255087509, "loss": 2.0996, "step": 1608 }, { "epoch": 0.33, "learning_rate": 0.0007865862497082344, "loss": 2.2383, "step": 1609 }, { "epoch": 0.33, "learning_rate": 0.0007863168500619934, "loss": 2.0479, "step": 1610 }, { "epoch": 0.33, "learning_rate": 0.0007860473266864467, "loss": 2.0674, "step": 1611 }, { "epoch": 0.33, "learning_rate": 0.0007857776796980663, "loss": 2.1143, "step": 1612 }, { "epoch": 0.33, "learning_rate": 0.0007855079092133778, "loss": 2.1396, "step": 1613 }, { "epoch": 0.33, "learning_rate": 0.0007852380153489602, "loss": 2.1182, "step": 1614 }, { "epoch": 0.33, "learning_rate": 0.0007849679982214458, "loss": 2.2148, "step": 1615 }, { "epoch": 0.33, "learning_rate": 0.0007846978579475201, "loss": 2.0547, "step": 1616 }, { "epoch": 0.33, "learning_rate": 0.0007844275946439216, "loss": 2.1914, "step": 1617 }, { "epoch": 0.33, "learning_rate": 0.0007841572084274427, "loss": 2.085, "step": 1618 }, { "epoch": 0.33, "learning_rate": 0.000783886699414928, "loss": 2.168, "step": 1619 }, { "epoch": 0.33, "learning_rate": 0.0007836160677232758, "loss": 2.1445, "step": 1620 }, { "epoch": 0.33, "learning_rate": 0.000783345313469437, "loss": 2.1338, "step": 1621 }, { "epoch": 0.33, "learning_rate": 0.0007830744367704159, "loss": 1.9678, "step": 1622 }, { "epoch": 0.33, "learning_rate": 0.0007828034377432694, "loss": 2.0273, "step": 1623 }, { "epoch": 0.33, "learning_rate": 0.0007825323165051072, "loss": 2.0459, "step": 1624 }, { "epoch": 0.33, "learning_rate": 0.0007822610731730922, "loss": 2.1182, "step": 1625 }, { "epoch": 0.33, "learning_rate": 0.0007819897078644396, "loss": 2.167, "step": 1626 }, { "epoch": 0.33, "learning_rate": 0.0007817182206964177, "loss": 2.1445, "step": 1627 }, { "epoch": 0.33, "learning_rate": 0.0007814466117863472, "loss": 2.0576, "step": 1628 }, { "epoch": 0.33, "learning_rate": 0.0007811748812516012, "loss": 2.124, "step": 1629 }, { "epoch": 0.33, "learning_rate": 0.0007809030292096064, "loss": 2.1338, "step": 1630 }, { "epoch": 0.33, "learning_rate": 0.0007806310557778406, "loss": 2.1562, "step": 1631 }, { "epoch": 0.33, "learning_rate": 0.0007803589610738351, "loss": 2.0693, "step": 1632 }, { "epoch": 0.33, "learning_rate": 0.000780086745215173, "loss": 2.0889, "step": 1633 }, { "epoch": 0.33, "learning_rate": 0.0007798144083194903, "loss": 2.1318, "step": 1634 }, { "epoch": 0.33, "learning_rate": 0.0007795419505044745, "loss": 2.123, "step": 1635 }, { "epoch": 0.33, "learning_rate": 0.0007792693718878662, "loss": 2.1162, "step": 1636 }, { "epoch": 0.33, "learning_rate": 0.0007789966725874577, "loss": 2.1279, "step": 1637 }, { "epoch": 0.33, "learning_rate": 0.0007787238527210937, "loss": 2.1006, "step": 1638 }, { "epoch": 0.33, "learning_rate": 0.000778450912406671, "loss": 2.0889, "step": 1639 }, { "epoch": 0.33, "learning_rate": 0.0007781778517621379, "loss": 2.082, "step": 1640 }, { "epoch": 0.33, "learning_rate": 0.0007779046709054958, "loss": 2.1289, "step": 1641 }, { "epoch": 0.33, "learning_rate": 0.000777631369954797, "loss": 2.1113, "step": 1642 }, { "epoch": 0.33, "learning_rate": 0.0007773579490281459, "loss": 2.1426, "step": 1643 }, { "epoch": 0.33, "learning_rate": 0.0007770844082436996, "loss": 2.1094, "step": 1644 }, { "epoch": 0.33, "learning_rate": 0.0007768107477196658, "loss": 2.1133, "step": 1645 }, { "epoch": 0.33, "learning_rate": 0.0007765369675743046, "loss": 2.0654, "step": 1646 }, { "epoch": 0.33, "learning_rate": 0.0007762630679259279, "loss": 2.0791, "step": 1647 }, { "epoch": 0.33, "learning_rate": 0.0007759890488928987, "loss": 2.1025, "step": 1648 }, { "epoch": 0.33, "learning_rate": 0.0007757149105936321, "loss": 2.0957, "step": 1649 }, { "epoch": 0.33, "learning_rate": 0.0007754406531465945, "loss": 2.1465, "step": 1650 }, { "epoch": 0.34, "learning_rate": 0.0007751662766703038, "loss": 2.0859, "step": 1651 }, { "epoch": 0.34, "learning_rate": 0.0007748917812833296, "loss": 2.041, "step": 1652 }, { "epoch": 0.34, "learning_rate": 0.0007746171671042924, "loss": 2.0693, "step": 1653 }, { "epoch": 0.34, "learning_rate": 0.0007743424342518643, "loss": 2.0762, "step": 1654 }, { "epoch": 0.34, "learning_rate": 0.000774067582844769, "loss": 2.0693, "step": 1655 }, { "epoch": 0.34, "learning_rate": 0.0007737926130017808, "loss": 2.0957, "step": 1656 }, { "epoch": 0.34, "learning_rate": 0.0007735175248417255, "loss": 2.084, "step": 1657 }, { "epoch": 0.34, "learning_rate": 0.0007732423184834803, "loss": 1.9834, "step": 1658 }, { "epoch": 0.34, "learning_rate": 0.0007729669940459729, "loss": 2.1357, "step": 1659 }, { "epoch": 0.34, "learning_rate": 0.0007726915516481824, "loss": 2.1328, "step": 1660 }, { "epoch": 0.34, "learning_rate": 0.0007724159914091388, "loss": 2.082, "step": 1661 }, { "epoch": 0.34, "learning_rate": 0.0007721403134479234, "loss": 2.0693, "step": 1662 }, { "epoch": 0.34, "learning_rate": 0.0007718645178836675, "loss": 2.0986, "step": 1663 }, { "epoch": 0.34, "learning_rate": 0.000771588604835554, "loss": 2.0898, "step": 1664 }, { "epoch": 0.34, "learning_rate": 0.000771312574422816, "loss": 2.04, "step": 1665 }, { "epoch": 0.34, "learning_rate": 0.000771036426764738, "loss": 2.0801, "step": 1666 }, { "epoch": 0.34, "learning_rate": 0.0007707601619806547, "loss": 2.0488, "step": 1667 }, { "epoch": 0.34, "learning_rate": 0.0007704837801899512, "loss": 2.0205, "step": 1668 }, { "epoch": 0.34, "learning_rate": 0.0007702072815120637, "loss": 2.1338, "step": 1669 }, { "epoch": 0.34, "learning_rate": 0.0007699306660664787, "loss": 2.0557, "step": 1670 }, { "epoch": 0.34, "learning_rate": 0.0007696539339727332, "loss": 2.1152, "step": 1671 }, { "epoch": 0.34, "learning_rate": 0.0007693770853504143, "loss": 2.1191, "step": 1672 }, { "epoch": 0.34, "learning_rate": 0.0007691001203191597, "loss": 2.0693, "step": 1673 }, { "epoch": 0.34, "learning_rate": 0.0007688230389986575, "loss": 2.0723, "step": 1674 }, { "epoch": 0.34, "learning_rate": 0.0007685458415086459, "loss": 2.1514, "step": 1675 }, { "epoch": 0.34, "learning_rate": 0.0007682685279689134, "loss": 2.1104, "step": 1676 }, { "epoch": 0.34, "learning_rate": 0.0007679910984992988, "loss": 2.0977, "step": 1677 }, { "epoch": 0.34, "learning_rate": 0.0007677135532196904, "loss": 1.9971, "step": 1678 }, { "epoch": 0.34, "learning_rate": 0.0007674358922500273, "loss": 2.1104, "step": 1679 }, { "epoch": 0.34, "learning_rate": 0.000767158115710298, "loss": 2.1299, "step": 1680 }, { "epoch": 0.34, "learning_rate": 0.0007668802237205412, "loss": 2.0566, "step": 1681 }, { "epoch": 0.34, "learning_rate": 0.0007666022164008457, "loss": 2.1494, "step": 1682 }, { "epoch": 0.34, "learning_rate": 0.0007663240938713497, "loss": 2.0352, "step": 1683 }, { "epoch": 0.34, "learning_rate": 0.0007660458562522414, "loss": 2.1094, "step": 1684 }, { "epoch": 0.34, "learning_rate": 0.000765767503663759, "loss": 2.0635, "step": 1685 }, { "epoch": 0.34, "learning_rate": 0.00076548903622619, "loss": 2.0957, "step": 1686 }, { "epoch": 0.34, "learning_rate": 0.0007652104540598712, "loss": 2.0254, "step": 1687 }, { "epoch": 0.34, "learning_rate": 0.0007649317572851901, "loss": 1.9375, "step": 1688 }, { "epoch": 0.34, "learning_rate": 0.0007646529460225827, "loss": 2.0107, "step": 1689 }, { "epoch": 0.34, "learning_rate": 0.0007643740203925352, "loss": 2.1836, "step": 1690 }, { "epoch": 0.34, "learning_rate": 0.000764094980515582, "loss": 2.1152, "step": 1691 }, { "epoch": 0.34, "learning_rate": 0.0007638158265123085, "loss": 2.0732, "step": 1692 }, { "epoch": 0.34, "learning_rate": 0.0007635365585033487, "loss": 2.1387, "step": 1693 }, { "epoch": 0.34, "learning_rate": 0.0007632571766093854, "loss": 2.123, "step": 1694 }, { "epoch": 0.34, "learning_rate": 0.000762977680951151, "loss": 2.0664, "step": 1695 }, { "epoch": 0.34, "learning_rate": 0.0007626980716494274, "loss": 2.0947, "step": 1696 }, { "epoch": 0.34, "learning_rate": 0.0007624183488250451, "loss": 2.0918, "step": 1697 }, { "epoch": 0.34, "learning_rate": 0.0007621385125988839, "loss": 2.1113, "step": 1698 }, { "epoch": 0.34, "learning_rate": 0.0007618585630918726, "loss": 2.1494, "step": 1699 }, { "epoch": 0.35, "learning_rate": 0.0007615785004249888, "loss": 2.1543, "step": 1700 }, { "epoch": 0.35, "learning_rate": 0.0007612983247192593, "loss": 2.1113, "step": 1701 }, { "epoch": 0.35, "learning_rate": 0.0007610180360957594, "loss": 2.0693, "step": 1702 }, { "epoch": 0.35, "learning_rate": 0.0007607376346756137, "loss": 2.0254, "step": 1703 }, { "epoch": 0.35, "learning_rate": 0.0007604571205799946, "loss": 2.0752, "step": 1704 }, { "epoch": 0.35, "learning_rate": 0.0007601764939301241, "loss": 2.0566, "step": 1705 }, { "epoch": 0.35, "learning_rate": 0.0007598957548472728, "loss": 2.0977, "step": 1706 }, { "epoch": 0.35, "learning_rate": 0.0007596149034527593, "loss": 2.1064, "step": 1707 }, { "epoch": 0.35, "learning_rate": 0.0007593339398679511, "loss": 2.0449, "step": 1708 }, { "epoch": 0.35, "learning_rate": 0.0007590528642142642, "loss": 2.0518, "step": 1709 }, { "epoch": 0.35, "learning_rate": 0.000758771676613163, "loss": 2.0654, "step": 1710 }, { "epoch": 0.35, "learning_rate": 0.00075849037718616, "loss": 2.1904, "step": 1711 }, { "epoch": 0.35, "learning_rate": 0.0007582089660548166, "loss": 2.002, "step": 1712 }, { "epoch": 0.35, "learning_rate": 0.0007579274433407422, "loss": 2.2383, "step": 1713 }, { "epoch": 0.35, "learning_rate": 0.000757645809165594, "loss": 2.0811, "step": 1714 }, { "epoch": 0.35, "learning_rate": 0.0007573640636510776, "loss": 2.0898, "step": 1715 }, { "epoch": 0.35, "learning_rate": 0.0007570822069189475, "loss": 2.1406, "step": 1716 }, { "epoch": 0.35, "learning_rate": 0.0007568002390910054, "loss": 2.0625, "step": 1717 }, { "epoch": 0.35, "learning_rate": 0.0007565181602891009, "loss": 2.1104, "step": 1718 }, { "epoch": 0.35, "learning_rate": 0.0007562359706351318, "loss": 2.0557, "step": 1719 }, { "epoch": 0.35, "learning_rate": 0.0007559536702510443, "loss": 2.2188, "step": 1720 }, { "epoch": 0.35, "learning_rate": 0.0007556712592588318, "loss": 2.0957, "step": 1721 }, { "epoch": 0.35, "learning_rate": 0.0007553887377805358, "loss": 2.0781, "step": 1722 }, { "epoch": 0.35, "learning_rate": 0.0007551061059382454, "loss": 2.0752, "step": 1723 }, { "epoch": 0.35, "learning_rate": 0.0007548233638540975, "loss": 2.0527, "step": 1724 }, { "epoch": 0.35, "learning_rate": 0.0007545405116502767, "loss": 1.96, "step": 1725 }, { "epoch": 0.35, "learning_rate": 0.0007542575494490148, "loss": 2.0166, "step": 1726 }, { "epoch": 0.35, "learning_rate": 0.0007539744773725916, "loss": 2.0566, "step": 1727 }, { "epoch": 0.35, "learning_rate": 0.0007536912955433344, "loss": 2.0723, "step": 1728 }, { "epoch": 0.35, "learning_rate": 0.0007534080040836173, "loss": 2.1016, "step": 1729 }, { "epoch": 0.35, "learning_rate": 0.0007531246031158627, "loss": 2.0791, "step": 1730 }, { "epoch": 0.35, "learning_rate": 0.0007528410927625396, "loss": 2.0488, "step": 1731 }, { "epoch": 0.35, "learning_rate": 0.0007525574731461642, "loss": 2.0889, "step": 1732 }, { "epoch": 0.35, "learning_rate": 0.0007522737443893007, "loss": 2.0547, "step": 1733 }, { "epoch": 0.35, "learning_rate": 0.0007519899066145597, "loss": 2.0879, "step": 1734 }, { "epoch": 0.35, "learning_rate": 0.0007517059599445995, "loss": 2.0928, "step": 1735 }, { "epoch": 0.35, "learning_rate": 0.0007514219045021246, "loss": 2.1699, "step": 1736 }, { "epoch": 0.35, "learning_rate": 0.0007511377404098876, "loss": 2.1309, "step": 1737 }, { "epoch": 0.35, "learning_rate": 0.0007508534677906874, "loss": 2.0469, "step": 1738 }, { "epoch": 0.35, "learning_rate": 0.0007505690867673697, "loss": 2.1045, "step": 1739 }, { "epoch": 0.35, "learning_rate": 0.0007502845974628274, "loss": 2.1006, "step": 1740 }, { "epoch": 0.35, "learning_rate": 0.00075, "loss": 2.0566, "step": 1741 }, { "epoch": 0.35, "learning_rate": 0.0007497152945018739, "loss": 2.0264, "step": 1742 }, { "epoch": 0.35, "learning_rate": 0.0007494304810914819, "loss": 2.0664, "step": 1743 }, { "epoch": 0.35, "learning_rate": 0.0007491455598919037, "loss": 2.1064, "step": 1744 }, { "epoch": 0.35, "learning_rate": 0.0007488605310262655, "loss": 2.1621, "step": 1745 }, { "epoch": 0.35, "learning_rate": 0.0007485753946177401, "loss": 2.1299, "step": 1746 }, { "epoch": 0.35, "learning_rate": 0.0007482901507895463, "loss": 2.0098, "step": 1747 }, { "epoch": 0.35, "learning_rate": 0.0007480047996649502, "loss": 2.0449, "step": 1748 }, { "epoch": 0.35, "learning_rate": 0.0007477193413672637, "loss": 2.0518, "step": 1749 }, { "epoch": 0.36, "learning_rate": 0.0007474337760198447, "loss": 2.0479, "step": 1750 }, { "epoch": 0.36, "learning_rate": 0.0007471481037460981, "loss": 2.1396, "step": 1751 }, { "epoch": 0.36, "learning_rate": 0.0007468623246694746, "loss": 2.0381, "step": 1752 }, { "epoch": 0.36, "learning_rate": 0.0007465764389134711, "loss": 2.0518, "step": 1753 }, { "epoch": 0.36, "learning_rate": 0.0007462904466016305, "loss": 2.0586, "step": 1754 }, { "epoch": 0.36, "learning_rate": 0.000746004347857542, "loss": 2.0938, "step": 1755 }, { "epoch": 0.36, "learning_rate": 0.0007457181428048405, "loss": 2.0615, "step": 1756 }, { "epoch": 0.36, "learning_rate": 0.0007454318315672073, "loss": 2.1436, "step": 1757 }, { "epoch": 0.36, "learning_rate": 0.0007451454142683689, "loss": 2.1289, "step": 1758 }, { "epoch": 0.36, "learning_rate": 0.0007448588910320983, "loss": 2.0459, "step": 1759 }, { "epoch": 0.36, "learning_rate": 0.0007445722619822137, "loss": 2.124, "step": 1760 }, { "epoch": 0.36, "learning_rate": 0.0007442855272425797, "loss": 2.0381, "step": 1761 }, { "epoch": 0.36, "learning_rate": 0.000743998686937106, "loss": 2.1562, "step": 1762 }, { "epoch": 0.36, "learning_rate": 0.0007437117411897481, "loss": 2.1152, "step": 1763 }, { "epoch": 0.36, "learning_rate": 0.0007434246901245069, "loss": 2.1475, "step": 1764 }, { "epoch": 0.36, "learning_rate": 0.0007431375338654295, "loss": 2.123, "step": 1765 }, { "epoch": 0.36, "learning_rate": 0.0007428502725366074, "loss": 2.1025, "step": 1766 }, { "epoch": 0.36, "learning_rate": 0.0007425629062621784, "loss": 2.1201, "step": 1767 }, { "epoch": 0.36, "learning_rate": 0.0007422754351663251, "loss": 2.0996, "step": 1768 }, { "epoch": 0.36, "learning_rate": 0.0007419878593732757, "loss": 2.042, "step": 1769 }, { "epoch": 0.36, "learning_rate": 0.0007417001790073035, "loss": 2.1074, "step": 1770 }, { "epoch": 0.36, "learning_rate": 0.0007414123941927272, "loss": 2.1172, "step": 1771 }, { "epoch": 0.36, "learning_rate": 0.0007411245050539103, "loss": 2.0293, "step": 1772 }, { "epoch": 0.36, "learning_rate": 0.0007408365117152617, "loss": 2.127, "step": 1773 }, { "epoch": 0.36, "learning_rate": 0.000740548414301235, "loss": 2.085, "step": 1774 }, { "epoch": 0.36, "learning_rate": 0.0007402602129363288, "loss": 2.1309, "step": 1775 }, { "epoch": 0.36, "learning_rate": 0.0007399719077450875, "loss": 2.0889, "step": 1776 }, { "epoch": 0.36, "learning_rate": 0.0007396834988520988, "loss": 2.0449, "step": 1777 }, { "epoch": 0.36, "learning_rate": 0.0007393949863819966, "loss": 2.1748, "step": 1778 }, { "epoch": 0.36, "learning_rate": 0.0007391063704594589, "loss": 2.0957, "step": 1779 }, { "epoch": 0.36, "learning_rate": 0.0007388176512092084, "loss": 2.084, "step": 1780 }, { "epoch": 0.36, "learning_rate": 0.0007385288287560128, "loss": 2.0576, "step": 1781 }, { "epoch": 0.36, "learning_rate": 0.0007382399032246841, "loss": 2.1055, "step": 1782 }, { "epoch": 0.36, "learning_rate": 0.0007379508747400787, "loss": 2.0488, "step": 1783 }, { "epoch": 0.36, "learning_rate": 0.0007376617434270981, "loss": 2.0781, "step": 1784 }, { "epoch": 0.36, "learning_rate": 0.0007373725094106875, "loss": 2.1387, "step": 1785 }, { "epoch": 0.36, "learning_rate": 0.0007370831728158371, "loss": 2.0586, "step": 1786 }, { "epoch": 0.36, "learning_rate": 0.000736793733767581, "loss": 2.0557, "step": 1787 }, { "epoch": 0.36, "learning_rate": 0.0007365041923909978, "loss": 2.0342, "step": 1788 }, { "epoch": 0.36, "learning_rate": 0.0007362145488112102, "loss": 2.0898, "step": 1789 }, { "epoch": 0.36, "learning_rate": 0.0007359248031533852, "loss": 2.1289, "step": 1790 }, { "epoch": 0.36, "learning_rate": 0.0007356349555427338, "loss": 2.0635, "step": 1791 }, { "epoch": 0.36, "learning_rate": 0.000735345006104511, "loss": 2.167, "step": 1792 }, { "epoch": 0.36, "learning_rate": 0.000735054954964016, "loss": 2.0732, "step": 1793 }, { "epoch": 0.36, "learning_rate": 0.0007347648022465919, "loss": 2.1084, "step": 1794 }, { "epoch": 0.36, "learning_rate": 0.0007344745480776256, "loss": 2.1504, "step": 1795 }, { "epoch": 0.36, "learning_rate": 0.0007341841925825478, "loss": 2.1904, "step": 1796 }, { "epoch": 0.36, "learning_rate": 0.0007338937358868332, "loss": 1.9873, "step": 1797 }, { "epoch": 0.36, "learning_rate": 0.000733603178116, "loss": 2.0586, "step": 1798 }, { "epoch": 0.37, "learning_rate": 0.0007333125193956101, "loss": 2.0654, "step": 1799 }, { "epoch": 0.37, "learning_rate": 0.0007330217598512695, "loss": 2.1152, "step": 1800 }, { "epoch": 0.37, "learning_rate": 0.000732730899608627, "loss": 2.0352, "step": 1801 }, { "epoch": 0.37, "learning_rate": 0.0007324399387933753, "loss": 2.001, "step": 1802 }, { "epoch": 0.37, "learning_rate": 0.0007321488775312506, "loss": 2.1543, "step": 1803 }, { "epoch": 0.37, "learning_rate": 0.0007318577159480327, "loss": 2.1182, "step": 1804 }, { "epoch": 0.37, "learning_rate": 0.0007315664541695441, "loss": 2.0508, "step": 1805 }, { "epoch": 0.37, "learning_rate": 0.0007312750923216514, "loss": 2.0693, "step": 1806 }, { "epoch": 0.37, "learning_rate": 0.0007309836305302637, "loss": 2.0947, "step": 1807 }, { "epoch": 0.37, "learning_rate": 0.0007306920689213343, "loss": 2.1123, "step": 1808 }, { "epoch": 0.37, "learning_rate": 0.000730400407620858, "loss": 2.0684, "step": 1809 }, { "epoch": 0.37, "learning_rate": 0.0007301086467548744, "loss": 2.0381, "step": 1810 }, { "epoch": 0.37, "learning_rate": 0.0007298167864494652, "loss": 2.1025, "step": 1811 }, { "epoch": 0.37, "learning_rate": 0.0007295248268307553, "loss": 2.0518, "step": 1812 }, { "epoch": 0.37, "learning_rate": 0.0007292327680249122, "loss": 2.085, "step": 1813 }, { "epoch": 0.37, "learning_rate": 0.000728940610158147, "loss": 2.0, "step": 1814 }, { "epoch": 0.37, "learning_rate": 0.0007286483533567127, "loss": 1.9971, "step": 1815 }, { "epoch": 0.37, "learning_rate": 0.0007283559977469059, "loss": 2.1104, "step": 1816 }, { "epoch": 0.37, "learning_rate": 0.0007280635434550652, "loss": 2.0518, "step": 1817 }, { "epoch": 0.37, "learning_rate": 0.0007277709906075722, "loss": 2.0225, "step": 1818 }, { "epoch": 0.37, "learning_rate": 0.0007274783393308513, "loss": 2.0684, "step": 1819 }, { "epoch": 0.37, "learning_rate": 0.0007271855897513687, "loss": 1.9697, "step": 1820 }, { "epoch": 0.37, "learning_rate": 0.000726892741995634, "loss": 2.0303, "step": 1821 }, { "epoch": 0.37, "learning_rate": 0.0007265997961901987, "loss": 2.0547, "step": 1822 }, { "epoch": 0.37, "learning_rate": 0.0007263067524616565, "loss": 2.0576, "step": 1823 }, { "epoch": 0.37, "learning_rate": 0.0007260136109366439, "loss": 2.0068, "step": 1824 }, { "epoch": 0.37, "learning_rate": 0.0007257203717418393, "loss": 2.0332, "step": 1825 }, { "epoch": 0.37, "learning_rate": 0.0007254270350039633, "loss": 2.0518, "step": 1826 }, { "epoch": 0.37, "learning_rate": 0.0007251336008497793, "loss": 2.0615, "step": 1827 }, { "epoch": 0.37, "learning_rate": 0.0007248400694060917, "loss": 2.0742, "step": 1828 }, { "epoch": 0.37, "learning_rate": 0.000724546440799748, "loss": 2.1357, "step": 1829 }, { "epoch": 0.37, "learning_rate": 0.0007242527151576368, "loss": 2.0615, "step": 1830 }, { "epoch": 0.37, "learning_rate": 0.0007239588926066893, "loss": 2.0439, "step": 1831 }, { "epoch": 0.37, "learning_rate": 0.0007236649732738784, "loss": 2.166, "step": 1832 }, { "epoch": 0.37, "learning_rate": 0.0007233709572862187, "loss": 2.0605, "step": 1833 }, { "epoch": 0.37, "learning_rate": 0.0007230768447707663, "loss": 2.1035, "step": 1834 }, { "epoch": 0.37, "learning_rate": 0.0007227826358546199, "loss": 2.1084, "step": 1835 }, { "epoch": 0.37, "learning_rate": 0.0007224883306649188, "loss": 2.0713, "step": 1836 }, { "epoch": 0.37, "learning_rate": 0.0007221939293288444, "loss": 2.1494, "step": 1837 }, { "epoch": 0.37, "learning_rate": 0.0007218994319736201, "loss": 1.9648, "step": 1838 }, { "epoch": 0.37, "learning_rate": 0.0007216048387265099, "loss": 1.9951, "step": 1839 }, { "epoch": 0.37, "learning_rate": 0.00072131014971482, "loss": 2.0791, "step": 1840 }, { "epoch": 0.37, "learning_rate": 0.0007210153650658974, "loss": 2.0635, "step": 1841 }, { "epoch": 0.37, "learning_rate": 0.0007207204849071308, "loss": 2.0254, "step": 1842 }, { "epoch": 0.37, "learning_rate": 0.0007204255093659501, "loss": 2.0264, "step": 1843 }, { "epoch": 0.37, "learning_rate": 0.0007201304385698263, "loss": 2.0703, "step": 1844 }, { "epoch": 0.37, "learning_rate": 0.0007198352726462717, "loss": 2.0283, "step": 1845 }, { "epoch": 0.37, "learning_rate": 0.0007195400117228397, "loss": 2.0342, "step": 1846 }, { "epoch": 0.37, "learning_rate": 0.0007192446559271246, "loss": 2.1357, "step": 1847 }, { "epoch": 0.38, "learning_rate": 0.0007189492053867618, "loss": 1.9854, "step": 1848 }, { "epoch": 0.38, "learning_rate": 0.0007186536602294277, "loss": 2.083, "step": 1849 }, { "epoch": 0.38, "learning_rate": 0.0007183580205828396, "loss": 2.001, "step": 1850 }, { "epoch": 0.38, "learning_rate": 0.0007180622865747555, "loss": 2.0576, "step": 1851 }, { "epoch": 0.38, "learning_rate": 0.0007177664583329742, "loss": 2.1025, "step": 1852 }, { "epoch": 0.38, "learning_rate": 0.0007174705359853355, "loss": 2.1592, "step": 1853 }, { "epoch": 0.38, "learning_rate": 0.0007171745196597193, "loss": 2.0293, "step": 1854 }, { "epoch": 0.38, "learning_rate": 0.0007168784094840467, "loss": 2.0938, "step": 1855 }, { "epoch": 0.38, "learning_rate": 0.0007165822055862791, "loss": 2.1504, "step": 1856 }, { "epoch": 0.38, "learning_rate": 0.0007162859080944182, "loss": 2.0195, "step": 1857 }, { "epoch": 0.38, "learning_rate": 0.0007159895171365065, "loss": 2.0332, "step": 1858 }, { "epoch": 0.38, "learning_rate": 0.0007156930328406268, "loss": 2.1592, "step": 1859 }, { "epoch": 0.38, "learning_rate": 0.000715396455334902, "loss": 2.0986, "step": 1860 }, { "epoch": 0.38, "learning_rate": 0.0007150997847474956, "loss": 2.0615, "step": 1861 }, { "epoch": 0.38, "learning_rate": 0.0007148030212066112, "loss": 2.1289, "step": 1862 }, { "epoch": 0.38, "learning_rate": 0.0007145061648404923, "loss": 2.1758, "step": 1863 }, { "epoch": 0.38, "learning_rate": 0.0007142092157774232, "loss": 2.0459, "step": 1864 }, { "epoch": 0.38, "learning_rate": 0.0007139121741457275, "loss": 2.0635, "step": 1865 }, { "epoch": 0.38, "learning_rate": 0.0007136150400737691, "loss": 2.0273, "step": 1866 }, { "epoch": 0.38, "learning_rate": 0.0007133178136899522, "loss": 2.084, "step": 1867 }, { "epoch": 0.38, "learning_rate": 0.00071302049512272, "loss": 2.1279, "step": 1868 }, { "epoch": 0.38, "learning_rate": 0.0007127230845005568, "loss": 1.9531, "step": 1869 }, { "epoch": 0.38, "learning_rate": 0.0007124255819519854, "loss": 2.0361, "step": 1870 }, { "epoch": 0.38, "learning_rate": 0.0007121279876055693, "loss": 2.041, "step": 1871 }, { "epoch": 0.38, "learning_rate": 0.0007118303015899109, "loss": 2.0449, "step": 1872 }, { "epoch": 0.38, "learning_rate": 0.000711532524033653, "loss": 2.0762, "step": 1873 }, { "epoch": 0.38, "learning_rate": 0.0007112346550654774, "loss": 1.9893, "step": 1874 }, { "epoch": 0.38, "learning_rate": 0.0007109366948141052, "loss": 2.0596, "step": 1875 }, { "epoch": 0.38, "learning_rate": 0.0007106386434082979, "loss": 2.0498, "step": 1876 }, { "epoch": 0.38, "learning_rate": 0.0007103405009768553, "loss": 2.1025, "step": 1877 }, { "epoch": 0.38, "learning_rate": 0.0007100422676486174, "loss": 2.0742, "step": 1878 }, { "epoch": 0.38, "learning_rate": 0.0007097439435524627, "loss": 2.0654, "step": 1879 }, { "epoch": 0.38, "learning_rate": 0.0007094455288173097, "loss": 2.1318, "step": 1880 }, { "epoch": 0.38, "learning_rate": 0.0007091470235721155, "loss": 2.0928, "step": 1881 }, { "epoch": 0.38, "learning_rate": 0.0007088484279458766, "loss": 2.0352, "step": 1882 }, { "epoch": 0.38, "learning_rate": 0.0007085497420676285, "loss": 2.0928, "step": 1883 }, { "epoch": 0.38, "learning_rate": 0.0007082509660664456, "loss": 2.1504, "step": 1884 }, { "epoch": 0.38, "learning_rate": 0.0007079521000714412, "loss": 2.0947, "step": 1885 }, { "epoch": 0.38, "learning_rate": 0.0007076531442117679, "loss": 2.0488, "step": 1886 }, { "epoch": 0.38, "learning_rate": 0.0007073540986166166, "loss": 2.1133, "step": 1887 }, { "epoch": 0.38, "learning_rate": 0.0007070549634152172, "loss": 2.0869, "step": 1888 }, { "epoch": 0.38, "learning_rate": 0.0007067557387368386, "loss": 2.0947, "step": 1889 }, { "epoch": 0.38, "learning_rate": 0.000706456424710788, "loss": 2.042, "step": 1890 }, { "epoch": 0.38, "learning_rate": 0.0007061570214664112, "loss": 2.0234, "step": 1891 }, { "epoch": 0.38, "learning_rate": 0.0007058575291330928, "loss": 2.0615, "step": 1892 }, { "epoch": 0.38, "learning_rate": 0.0007055579478402556, "loss": 2.1006, "step": 1893 }, { "epoch": 0.38, "learning_rate": 0.0007052582777173613, "loss": 2.0518, "step": 1894 }, { "epoch": 0.38, "learning_rate": 0.0007049585188939095, "loss": 2.0801, "step": 1895 }, { "epoch": 0.38, "learning_rate": 0.0007046586714994383, "loss": 2.0889, "step": 1896 }, { "epoch": 0.39, "learning_rate": 0.0007043587356635242, "loss": 2.0762, "step": 1897 }, { "epoch": 0.39, "learning_rate": 0.000704058711515782, "loss": 2.1025, "step": 1898 }, { "epoch": 0.39, "learning_rate": 0.0007037585991858642, "loss": 2.0547, "step": 1899 }, { "epoch": 0.39, "learning_rate": 0.0007034583988034618, "loss": 2.1924, "step": 1900 }, { "epoch": 0.39, "learning_rate": 0.0007031581104983038, "loss": 2.0889, "step": 1901 }, { "epoch": 0.39, "learning_rate": 0.0007028577344001573, "loss": 2.0977, "step": 1902 }, { "epoch": 0.39, "learning_rate": 0.0007025572706388268, "loss": 2.1211, "step": 1903 }, { "epoch": 0.39, "learning_rate": 0.0007022567193441553, "loss": 2.041, "step": 1904 }, { "epoch": 0.39, "learning_rate": 0.0007019560806460235, "loss": 2.0967, "step": 1905 }, { "epoch": 0.39, "learning_rate": 0.0007016553546743495, "loss": 2.0605, "step": 1906 }, { "epoch": 0.39, "learning_rate": 0.0007013545415590896, "loss": 2.1602, "step": 1907 }, { "epoch": 0.39, "learning_rate": 0.0007010536414302375, "loss": 1.9336, "step": 1908 }, { "epoch": 0.39, "learning_rate": 0.0007007526544178243, "loss": 1.9941, "step": 1909 }, { "epoch": 0.39, "learning_rate": 0.0007004515806519193, "loss": 2.0254, "step": 1910 }, { "epoch": 0.39, "learning_rate": 0.0007001504202626285, "loss": 2.0586, "step": 1911 }, { "epoch": 0.39, "learning_rate": 0.000699849173380096, "loss": 2.0889, "step": 1912 }, { "epoch": 0.39, "learning_rate": 0.0006995478401345028, "loss": 2.082, "step": 1913 }, { "epoch": 0.39, "learning_rate": 0.0006992464206560675, "loss": 2.125, "step": 1914 }, { "epoch": 0.39, "learning_rate": 0.000698944915075046, "loss": 2.0879, "step": 1915 }, { "epoch": 0.39, "learning_rate": 0.000698643323521731, "loss": 2.0439, "step": 1916 }, { "epoch": 0.39, "learning_rate": 0.0006983416461264527, "loss": 2.0264, "step": 1917 }, { "epoch": 0.39, "learning_rate": 0.0006980398830195785, "loss": 2.1162, "step": 1918 }, { "epoch": 0.39, "learning_rate": 0.0006977380343315125, "loss": 2.0264, "step": 1919 }, { "epoch": 0.39, "learning_rate": 0.000697436100192696, "loss": 2.0156, "step": 1920 }, { "epoch": 0.39, "learning_rate": 0.0006971340807336072, "loss": 2.0205, "step": 1921 }, { "epoch": 0.39, "learning_rate": 0.0006968319760847613, "loss": 2.0381, "step": 1922 }, { "epoch": 0.39, "learning_rate": 0.0006965297863767097, "loss": 1.9639, "step": 1923 }, { "epoch": 0.39, "learning_rate": 0.0006962275117400415, "loss": 2.0625, "step": 1924 }, { "epoch": 0.39, "learning_rate": 0.0006959251523053819, "loss": 2.127, "step": 1925 }, { "epoch": 0.39, "learning_rate": 0.0006956227082033929, "loss": 2.0625, "step": 1926 }, { "epoch": 0.39, "learning_rate": 0.0006953201795647728, "loss": 2.0996, "step": 1927 }, { "epoch": 0.39, "learning_rate": 0.0006950175665202568, "loss": 2.0781, "step": 1928 }, { "epoch": 0.39, "learning_rate": 0.0006947148692006169, "loss": 2.0713, "step": 1929 }, { "epoch": 0.39, "learning_rate": 0.0006944120877366604, "loss": 2.0605, "step": 1930 }, { "epoch": 0.39, "learning_rate": 0.0006941092222592319, "loss": 1.9629, "step": 1931 }, { "epoch": 0.39, "learning_rate": 0.0006938062728992123, "loss": 2.0664, "step": 1932 }, { "epoch": 0.39, "learning_rate": 0.0006935032397875182, "loss": 1.9453, "step": 1933 }, { "epoch": 0.39, "learning_rate": 0.0006932001230551028, "loss": 2.0332, "step": 1934 }, { "epoch": 0.39, "learning_rate": 0.0006928969228329554, "loss": 2.1221, "step": 1935 }, { "epoch": 0.39, "learning_rate": 0.000692593639252101, "loss": 2.1445, "step": 1936 }, { "epoch": 0.39, "learning_rate": 0.0006922902724436016, "loss": 2.0703, "step": 1937 }, { "epoch": 0.39, "learning_rate": 0.0006919868225385536, "loss": 2.1719, "step": 1938 }, { "epoch": 0.39, "learning_rate": 0.0006916832896680908, "loss": 2.0361, "step": 1939 }, { "epoch": 0.39, "learning_rate": 0.0006913796739633822, "loss": 2.0605, "step": 1940 }, { "epoch": 0.39, "learning_rate": 0.0006910759755556325, "loss": 2.0449, "step": 1941 }, { "epoch": 0.39, "learning_rate": 0.0006907721945760823, "loss": 1.9736, "step": 1942 }, { "epoch": 0.39, "learning_rate": 0.0006904683311560081, "loss": 2.0391, "step": 1943 }, { "epoch": 0.39, "learning_rate": 0.0006901643854267214, "loss": 2.0176, "step": 1944 }, { "epoch": 0.39, "learning_rate": 0.0006898603575195701, "loss": 2.0771, "step": 1945 }, { "epoch": 0.39, "learning_rate": 0.000689556247565937, "loss": 2.0186, "step": 1946 }, { "epoch": 0.4, "learning_rate": 0.0006892520556972403, "loss": 2.0186, "step": 1947 }, { "epoch": 0.4, "learning_rate": 0.0006889477820449342, "loss": 2.1201, "step": 1948 }, { "epoch": 0.4, "learning_rate": 0.0006886434267405078, "loss": 2.0928, "step": 1949 }, { "epoch": 0.4, "learning_rate": 0.0006883389899154856, "loss": 2.1406, "step": 1950 }, { "epoch": 0.4, "learning_rate": 0.0006880344717014271, "loss": 1.9775, "step": 1951 }, { "epoch": 0.4, "learning_rate": 0.000687729872229927, "loss": 2.0918, "step": 1952 }, { "epoch": 0.4, "learning_rate": 0.000687425191632616, "loss": 2.0576, "step": 1953 }, { "epoch": 0.4, "learning_rate": 0.0006871204300411584, "loss": 2.1602, "step": 1954 }, { "epoch": 0.4, "learning_rate": 0.0006868155875872546, "loss": 2.2031, "step": 1955 }, { "epoch": 0.4, "learning_rate": 0.0006865106644026396, "loss": 2.1123, "step": 1956 }, { "epoch": 0.4, "learning_rate": 0.0006862056606190829, "loss": 2.0195, "step": 1957 }, { "epoch": 0.4, "learning_rate": 0.0006859005763683899, "loss": 2.0977, "step": 1958 }, { "epoch": 0.4, "learning_rate": 0.0006855954117823993, "loss": 2.0977, "step": 1959 }, { "epoch": 0.4, "learning_rate": 0.0006852901669929856, "loss": 2.0986, "step": 1960 }, { "epoch": 0.4, "learning_rate": 0.000684984842132058, "loss": 2.1445, "step": 1961 }, { "epoch": 0.4, "learning_rate": 0.0006846794373315593, "loss": 2.0752, "step": 1962 }, { "epoch": 0.4, "learning_rate": 0.000684373952723468, "loss": 2.0508, "step": 1963 }, { "epoch": 0.4, "learning_rate": 0.0006840683884397965, "loss": 2.0791, "step": 1964 }, { "epoch": 0.4, "learning_rate": 0.0006837627446125916, "loss": 2.0693, "step": 1965 }, { "epoch": 0.4, "learning_rate": 0.0006834570213739346, "loss": 2.0264, "step": 1966 }, { "epoch": 0.4, "learning_rate": 0.000683151218855941, "loss": 2.041, "step": 1967 }, { "epoch": 0.4, "learning_rate": 0.000682845337190761, "loss": 2.1426, "step": 1968 }, { "epoch": 0.4, "learning_rate": 0.0006825393765105782, "loss": 2.001, "step": 1969 }, { "epoch": 0.4, "learning_rate": 0.0006822333369476113, "loss": 2.0664, "step": 1970 }, { "epoch": 0.4, "learning_rate": 0.0006819272186341123, "loss": 2.1562, "step": 1971 }, { "epoch": 0.4, "learning_rate": 0.0006816210217023675, "loss": 2.1113, "step": 1972 }, { "epoch": 0.4, "learning_rate": 0.0006813147462846974, "loss": 2.124, "step": 1973 }, { "epoch": 0.4, "learning_rate": 0.0006810083925134561, "loss": 1.9893, "step": 1974 }, { "epoch": 0.4, "learning_rate": 0.0006807019605210319, "loss": 1.9541, "step": 1975 }, { "epoch": 0.4, "learning_rate": 0.0006803954504398462, "loss": 1.9932, "step": 1976 }, { "epoch": 0.4, "learning_rate": 0.0006800888624023553, "loss": 2.0771, "step": 1977 }, { "epoch": 0.4, "learning_rate": 0.000679782196541048, "loss": 2.1074, "step": 1978 }, { "epoch": 0.4, "learning_rate": 0.0006794754529884473, "loss": 2.0146, "step": 1979 }, { "epoch": 0.4, "learning_rate": 0.00067916863187711, "loss": 2.0332, "step": 1980 }, { "epoch": 0.4, "learning_rate": 0.000678861733339626, "loss": 2.0576, "step": 1981 }, { "epoch": 0.4, "learning_rate": 0.0006785547575086187, "loss": 2.0312, "step": 1982 }, { "epoch": 0.4, "learning_rate": 0.0006782477045167451, "loss": 2.0137, "step": 1983 }, { "epoch": 0.4, "learning_rate": 0.0006779405744966955, "loss": 2.083, "step": 1984 }, { "epoch": 0.4, "learning_rate": 0.0006776333675811934, "loss": 2.085, "step": 1985 }, { "epoch": 0.4, "learning_rate": 0.0006773260839029955, "loss": 2.0908, "step": 1986 }, { "epoch": 0.4, "learning_rate": 0.0006770187235948916, "loss": 2.0312, "step": 1987 }, { "epoch": 0.4, "learning_rate": 0.0006767112867897051, "loss": 2.082, "step": 1988 }, { "epoch": 0.4, "learning_rate": 0.000676403773620292, "loss": 2.0664, "step": 1989 }, { "epoch": 0.4, "learning_rate": 0.0006760961842195413, "loss": 2.0312, "step": 1990 }, { "epoch": 0.4, "learning_rate": 0.0006757885187203748, "loss": 2.0859, "step": 1991 }, { "epoch": 0.4, "learning_rate": 0.0006754807772557479, "loss": 2.04, "step": 1992 }, { "epoch": 0.4, "learning_rate": 0.0006751729599586483, "loss": 2.0674, "step": 1993 }, { "epoch": 0.4, "learning_rate": 0.0006748650669620964, "loss": 2.0254, "step": 1994 }, { "epoch": 0.4, "learning_rate": 0.0006745570983991454, "loss": 2.0488, "step": 1995 }, { "epoch": 0.41, "learning_rate": 0.0006742490544028814, "loss": 2.1641, "step": 1996 }, { "epoch": 0.41, "learning_rate": 0.0006739409351064226, "loss": 1.9443, "step": 1997 }, { "epoch": 0.41, "learning_rate": 0.0006736327406429202, "loss": 2.0371, "step": 1998 }, { "epoch": 0.41, "learning_rate": 0.000673324471145558, "loss": 2.0449, "step": 1999 }, { "epoch": 0.41, "learning_rate": 0.0006730161267475515, "loss": 2.1338, "step": 2000 }, { "epoch": 0.41, "learning_rate": 0.0006727077075821493, "loss": 1.9824, "step": 2001 }, { "epoch": 0.41, "learning_rate": 0.0006723992137826319, "loss": 2.042, "step": 2002 }, { "epoch": 0.41, "learning_rate": 0.0006720906454823121, "loss": 2.1094, "step": 2003 }, { "epoch": 0.41, "learning_rate": 0.0006717820028145352, "loss": 2.0693, "step": 2004 }, { "epoch": 0.41, "learning_rate": 0.0006714732859126782, "loss": 2.0605, "step": 2005 }, { "epoch": 0.41, "learning_rate": 0.0006711644949101505, "loss": 2.0225, "step": 2006 }, { "epoch": 0.41, "learning_rate": 0.0006708556299403935, "loss": 2.0635, "step": 2007 }, { "epoch": 0.41, "learning_rate": 0.0006705466911368803, "loss": 2.042, "step": 2008 }, { "epoch": 0.41, "learning_rate": 0.0006702376786331164, "loss": 2.043, "step": 2009 }, { "epoch": 0.41, "learning_rate": 0.0006699285925626383, "loss": 2.0576, "step": 2010 }, { "epoch": 0.41, "learning_rate": 0.0006696194330590151, "loss": 2.0928, "step": 2011 }, { "epoch": 0.41, "learning_rate": 0.0006693102002558475, "loss": 2.0303, "step": 2012 }, { "epoch": 0.41, "learning_rate": 0.0006690008942867677, "loss": 2.0332, "step": 2013 }, { "epoch": 0.41, "learning_rate": 0.0006686915152854393, "loss": 2.0342, "step": 2014 }, { "epoch": 0.41, "learning_rate": 0.000668382063385558, "loss": 2.0859, "step": 2015 }, { "epoch": 0.41, "learning_rate": 0.0006680725387208505, "loss": 2.0312, "step": 2016 }, { "epoch": 0.41, "learning_rate": 0.0006677629414250753, "loss": 1.9346, "step": 2017 }, { "epoch": 0.41, "learning_rate": 0.0006674532716320219, "loss": 2.1807, "step": 2018 }, { "epoch": 0.41, "learning_rate": 0.0006671435294755118, "loss": 2.0371, "step": 2019 }, { "epoch": 0.41, "learning_rate": 0.000666833715089397, "loss": 2.0703, "step": 2020 }, { "epoch": 0.41, "learning_rate": 0.0006665238286075612, "loss": 2.1387, "step": 2021 }, { "epoch": 0.41, "learning_rate": 0.0006662138701639191, "loss": 2.04, "step": 2022 }, { "epoch": 0.41, "learning_rate": 0.0006659038398924165, "loss": 2.1367, "step": 2023 }, { "epoch": 0.41, "learning_rate": 0.0006655937379270303, "loss": 2.0293, "step": 2024 }, { "epoch": 0.41, "learning_rate": 0.0006652835644017682, "loss": 2.1064, "step": 2025 }, { "epoch": 0.41, "learning_rate": 0.000664973319450669, "loss": 2.1016, "step": 2026 }, { "epoch": 0.41, "learning_rate": 0.0006646630032078024, "loss": 2.0469, "step": 2027 }, { "epoch": 0.41, "learning_rate": 0.0006643526158072688, "loss": 2.0596, "step": 2028 }, { "epoch": 0.41, "learning_rate": 0.0006640421573831994, "loss": 2.1494, "step": 2029 }, { "epoch": 0.41, "learning_rate": 0.000663731628069756, "loss": 2.0723, "step": 2030 }, { "epoch": 0.41, "learning_rate": 0.0006634210280011312, "loss": 2.0898, "step": 2031 }, { "epoch": 0.41, "learning_rate": 0.000663110357311548, "loss": 2.0918, "step": 2032 }, { "epoch": 0.41, "learning_rate": 0.00066279961613526, "loss": 2.0254, "step": 2033 }, { "epoch": 0.41, "learning_rate": 0.0006624888046065516, "loss": 2.0693, "step": 2034 }, { "epoch": 0.41, "learning_rate": 0.0006621779228597368, "loss": 2.0244, "step": 2035 }, { "epoch": 0.41, "learning_rate": 0.0006618669710291606, "loss": 2.0576, "step": 2036 }, { "epoch": 0.41, "learning_rate": 0.000661555949249198, "loss": 2.1299, "step": 2037 }, { "epoch": 0.41, "learning_rate": 0.0006612448576542544, "loss": 2.1006, "step": 2038 }, { "epoch": 0.41, "learning_rate": 0.0006609336963787655, "loss": 2.043, "step": 2039 }, { "epoch": 0.41, "learning_rate": 0.0006606224655571966, "loss": 2.0625, "step": 2040 }, { "epoch": 0.41, "learning_rate": 0.0006603111653240433, "loss": 2.0566, "step": 2041 }, { "epoch": 0.41, "learning_rate": 0.0006599997958138315, "loss": 2.1348, "step": 2042 }, { "epoch": 0.41, "learning_rate": 0.0006596883571611168, "loss": 2.0859, "step": 2043 }, { "epoch": 0.41, "learning_rate": 0.0006593768495004848, "loss": 2.0352, "step": 2044 }, { "epoch": 0.42, "learning_rate": 0.0006590652729665503, "loss": 2.127, "step": 2045 }, { "epoch": 0.42, "learning_rate": 0.0006587536276939585, "loss": 2.0312, "step": 2046 }, { "epoch": 0.42, "learning_rate": 0.0006584419138173845, "loss": 1.9766, "step": 2047 }, { "epoch": 0.42, "learning_rate": 0.0006581301314715325, "loss": 2.0508, "step": 2048 }, { "epoch": 0.42, "learning_rate": 0.0006578182807911363, "loss": 2.0371, "step": 2049 }, { "epoch": 0.42, "learning_rate": 0.0006575063619109599, "loss": 2.0537, "step": 2050 }, { "epoch": 0.42, "learning_rate": 0.0006571943749657959, "loss": 2.0889, "step": 2051 }, { "epoch": 0.42, "learning_rate": 0.0006568823200904667, "loss": 2.0166, "step": 2052 }, { "epoch": 0.42, "learning_rate": 0.0006565701974198244, "loss": 2.0801, "step": 2053 }, { "epoch": 0.42, "learning_rate": 0.0006562580070887498, "loss": 2.0078, "step": 2054 }, { "epoch": 0.42, "learning_rate": 0.0006559457492321533, "loss": 2.0781, "step": 2055 }, { "epoch": 0.42, "learning_rate": 0.0006556334239849743, "loss": 1.9902, "step": 2056 }, { "epoch": 0.42, "learning_rate": 0.0006553210314821814, "loss": 1.9727, "step": 2057 }, { "epoch": 0.42, "learning_rate": 0.0006550085718587724, "loss": 2.084, "step": 2058 }, { "epoch": 0.42, "learning_rate": 0.0006546960452497738, "loss": 1.9941, "step": 2059 }, { "epoch": 0.42, "learning_rate": 0.0006543834517902415, "loss": 1.9961, "step": 2060 }, { "epoch": 0.42, "learning_rate": 0.0006540707916152598, "loss": 2.0566, "step": 2061 }, { "epoch": 0.42, "learning_rate": 0.0006537580648599419, "loss": 2.0879, "step": 2062 }, { "epoch": 0.42, "learning_rate": 0.0006534452716594301, "loss": 2.0918, "step": 2063 }, { "epoch": 0.42, "learning_rate": 0.0006531324121488954, "loss": 2.0381, "step": 2064 }, { "epoch": 0.42, "learning_rate": 0.000652819486463537, "loss": 2.0586, "step": 2065 }, { "epoch": 0.42, "learning_rate": 0.0006525064947385832, "loss": 2.0303, "step": 2066 }, { "epoch": 0.42, "learning_rate": 0.0006521934371092901, "loss": 2.0947, "step": 2067 }, { "epoch": 0.42, "learning_rate": 0.0006518803137109437, "loss": 2.0801, "step": 2068 }, { "epoch": 0.42, "learning_rate": 0.0006515671246788567, "loss": 2.0625, "step": 2069 }, { "epoch": 0.42, "learning_rate": 0.0006512538701483712, "loss": 1.9912, "step": 2070 }, { "epoch": 0.42, "learning_rate": 0.0006509405502548578, "loss": 2.0078, "step": 2071 }, { "epoch": 0.42, "learning_rate": 0.0006506271651337144, "loss": 2.0127, "step": 2072 }, { "epoch": 0.42, "learning_rate": 0.0006503137149203679, "loss": 2.0215, "step": 2073 }, { "epoch": 0.42, "learning_rate": 0.000650000199750273, "loss": 2.1064, "step": 2074 }, { "epoch": 0.42, "learning_rate": 0.0006496866197589125, "loss": 2.0947, "step": 2075 }, { "epoch": 0.42, "learning_rate": 0.0006493729750817974, "loss": 2.0674, "step": 2076 }, { "epoch": 0.42, "learning_rate": 0.000649059265854466, "loss": 2.1055, "step": 2077 }, { "epoch": 0.42, "learning_rate": 0.0006487454922124854, "loss": 2.0371, "step": 2078 }, { "epoch": 0.42, "learning_rate": 0.0006484316542914502, "loss": 1.9561, "step": 2079 }, { "epoch": 0.42, "learning_rate": 0.0006481177522269824, "loss": 2.0049, "step": 2080 }, { "epoch": 0.42, "learning_rate": 0.0006478037861547321, "loss": 2.1172, "step": 2081 }, { "epoch": 0.42, "learning_rate": 0.0006474897562103771, "loss": 2.0713, "step": 2082 }, { "epoch": 0.42, "learning_rate": 0.0006471756625296225, "loss": 2.0449, "step": 2083 }, { "epoch": 0.42, "learning_rate": 0.0006468615052482011, "loss": 2.0479, "step": 2084 }, { "epoch": 0.42, "learning_rate": 0.0006465472845018735, "loss": 2.0879, "step": 2085 }, { "epoch": 0.42, "learning_rate": 0.0006462330004264272, "loss": 2.0762, "step": 2086 }, { "epoch": 0.42, "learning_rate": 0.0006459186531576771, "loss": 2.167, "step": 2087 }, { "epoch": 0.42, "learning_rate": 0.0006456042428314661, "loss": 2.0244, "step": 2088 }, { "epoch": 0.42, "learning_rate": 0.0006452897695836635, "loss": 1.9688, "step": 2089 }, { "epoch": 0.42, "learning_rate": 0.0006449752335501662, "loss": 2.0557, "step": 2090 }, { "epoch": 0.42, "learning_rate": 0.0006446606348668982, "loss": 2.0137, "step": 2091 }, { "epoch": 0.42, "learning_rate": 0.0006443459736698105, "loss": 2.1084, "step": 2092 }, { "epoch": 0.42, "learning_rate": 0.0006440312500948814, "loss": 2.0898, "step": 2093 }, { "epoch": 0.43, "learning_rate": 0.0006437164642781155, "loss": 2.0107, "step": 2094 }, { "epoch": 0.43, "learning_rate": 0.0006434016163555452, "loss": 2.0244, "step": 2095 }, { "epoch": 0.43, "learning_rate": 0.0006430867064632289, "loss": 2.0938, "step": 2096 }, { "epoch": 0.43, "learning_rate": 0.0006427717347372523, "loss": 1.9639, "step": 2097 }, { "epoch": 0.43, "learning_rate": 0.0006424567013137276, "loss": 2.1016, "step": 2098 }, { "epoch": 0.43, "learning_rate": 0.0006421416063287937, "loss": 1.9219, "step": 2099 }, { "epoch": 0.43, "learning_rate": 0.0006418264499186162, "loss": 2.0166, "step": 2100 }, { "epoch": 0.43, "learning_rate": 0.000641511232219387, "loss": 2.1084, "step": 2101 }, { "epoch": 0.43, "learning_rate": 0.000641195953367325, "loss": 1.9727, "step": 2102 }, { "epoch": 0.43, "learning_rate": 0.0006408806134986749, "loss": 2.0195, "step": 2103 }, { "epoch": 0.43, "learning_rate": 0.000640565212749708, "loss": 2.0039, "step": 2104 }, { "epoch": 0.43, "learning_rate": 0.000640249751256722, "loss": 2.0352, "step": 2105 }, { "epoch": 0.43, "learning_rate": 0.0006399342291560411, "loss": 2.0576, "step": 2106 }, { "epoch": 0.43, "learning_rate": 0.0006396186465840152, "loss": 2.041, "step": 2107 }, { "epoch": 0.43, "learning_rate": 0.0006393030036770203, "loss": 1.9531, "step": 2108 }, { "epoch": 0.43, "learning_rate": 0.000638987300571459, "loss": 2.0312, "step": 2109 }, { "epoch": 0.43, "learning_rate": 0.0006386715374037595, "loss": 2.0303, "step": 2110 }, { "epoch": 0.43, "learning_rate": 0.0006383557143103762, "loss": 2.1758, "step": 2111 }, { "epoch": 0.43, "learning_rate": 0.0006380398314277889, "loss": 2.0615, "step": 2112 }, { "epoch": 0.43, "learning_rate": 0.000637723888892504, "loss": 2.0049, "step": 2113 }, { "epoch": 0.43, "learning_rate": 0.0006374078868410534, "loss": 2.0635, "step": 2114 }, { "epoch": 0.43, "learning_rate": 0.0006370918254099939, "loss": 2.0605, "step": 2115 }, { "epoch": 0.43, "learning_rate": 0.0006367757047359093, "loss": 2.0273, "step": 2116 }, { "epoch": 0.43, "learning_rate": 0.0006364595249554084, "loss": 2.0703, "step": 2117 }, { "epoch": 0.43, "learning_rate": 0.0006361432862051251, "loss": 2.1406, "step": 2118 }, { "epoch": 0.43, "learning_rate": 0.0006358269886217194, "loss": 2.0664, "step": 2119 }, { "epoch": 0.43, "learning_rate": 0.0006355106323418766, "loss": 1.9521, "step": 2120 }, { "epoch": 0.43, "learning_rate": 0.000635194217502307, "loss": 2.0654, "step": 2121 }, { "epoch": 0.43, "learning_rate": 0.0006348777442397467, "loss": 2.043, "step": 2122 }, { "epoch": 0.43, "learning_rate": 0.000634561212690957, "loss": 2.0283, "step": 2123 }, { "epoch": 0.43, "learning_rate": 0.0006342446229927239, "loss": 2.0928, "step": 2124 }, { "epoch": 0.43, "learning_rate": 0.0006339279752818591, "loss": 2.0771, "step": 2125 }, { "epoch": 0.43, "learning_rate": 0.0006336112696951988, "loss": 2.0312, "step": 2126 }, { "epoch": 0.43, "learning_rate": 0.0006332945063696048, "loss": 2.0264, "step": 2127 }, { "epoch": 0.43, "learning_rate": 0.0006329776854419636, "loss": 2.0322, "step": 2128 }, { "epoch": 0.43, "learning_rate": 0.0006326608070491864, "loss": 2.0967, "step": 2129 }, { "epoch": 0.43, "learning_rate": 0.0006323438713282095, "loss": 2.1104, "step": 2130 }, { "epoch": 0.43, "learning_rate": 0.0006320268784159939, "loss": 2.0752, "step": 2131 }, { "epoch": 0.43, "learning_rate": 0.0006317098284495251, "loss": 2.0693, "step": 2132 }, { "epoch": 0.43, "learning_rate": 0.0006313927215658138, "loss": 2.0908, "step": 2133 }, { "epoch": 0.43, "learning_rate": 0.0006310755579018948, "loss": 2.0508, "step": 2134 }, { "epoch": 0.43, "learning_rate": 0.0006307583375948273, "loss": 2.0117, "step": 2135 }, { "epoch": 0.43, "learning_rate": 0.0006304410607816957, "loss": 2.1191, "step": 2136 }, { "epoch": 0.43, "learning_rate": 0.0006301237275996082, "loss": 2.0615, "step": 2137 }, { "epoch": 0.43, "learning_rate": 0.0006298063381856977, "loss": 2.0254, "step": 2138 }, { "epoch": 0.43, "learning_rate": 0.0006294888926771208, "loss": 2.0605, "step": 2139 }, { "epoch": 0.43, "learning_rate": 0.0006291713912110591, "loss": 2.0713, "step": 2140 }, { "epoch": 0.43, "learning_rate": 0.0006288538339247182, "loss": 2.0488, "step": 2141 }, { "epoch": 0.43, "learning_rate": 0.0006285362209553273, "loss": 1.959, "step": 2142 }, { "epoch": 0.43, "learning_rate": 0.0006282185524401404, "loss": 2.0566, "step": 2143 }, { "epoch": 0.44, "learning_rate": 0.000627900828516435, "loss": 2.0479, "step": 2144 }, { "epoch": 0.44, "learning_rate": 0.0006275830493215127, "loss": 2.0459, "step": 2145 }, { "epoch": 0.44, "learning_rate": 0.0006272652149926989, "loss": 2.0781, "step": 2146 }, { "epoch": 0.44, "learning_rate": 0.000626947325667343, "loss": 1.9443, "step": 2147 }, { "epoch": 0.44, "learning_rate": 0.0006266293814828181, "loss": 2.04, "step": 2148 }, { "epoch": 0.44, "learning_rate": 0.000626311382576521, "loss": 1.9629, "step": 2149 }, { "epoch": 0.44, "learning_rate": 0.0006259933290858719, "loss": 2.0479, "step": 2150 }, { "epoch": 0.44, "learning_rate": 0.0006256752211483151, "loss": 2.0137, "step": 2151 }, { "epoch": 0.44, "learning_rate": 0.0006253570589013182, "loss": 2.0137, "step": 2152 }, { "epoch": 0.44, "learning_rate": 0.0006250388424823719, "loss": 2.0527, "step": 2153 }, { "epoch": 0.44, "learning_rate": 0.0006247205720289907, "loss": 2.0859, "step": 2154 }, { "epoch": 0.44, "learning_rate": 0.0006244022476787124, "loss": 2.0469, "step": 2155 }, { "epoch": 0.44, "learning_rate": 0.0006240838695690983, "loss": 1.9951, "step": 2156 }, { "epoch": 0.44, "learning_rate": 0.0006237654378377324, "loss": 2.0527, "step": 2157 }, { "epoch": 0.44, "learning_rate": 0.0006234469526222223, "loss": 2.0322, "step": 2158 }, { "epoch": 0.44, "learning_rate": 0.0006231284140601985, "loss": 2.0732, "step": 2159 }, { "epoch": 0.44, "learning_rate": 0.0006228098222893148, "loss": 2.0059, "step": 2160 }, { "epoch": 0.44, "learning_rate": 0.0006224911774472476, "loss": 1.9941, "step": 2161 }, { "epoch": 0.44, "learning_rate": 0.0006221724796716966, "loss": 2.0146, "step": 2162 }, { "epoch": 0.44, "learning_rate": 0.0006218537291003842, "loss": 2.1162, "step": 2163 }, { "epoch": 0.44, "learning_rate": 0.0006215349258710558, "loss": 2.0537, "step": 2164 }, { "epoch": 0.44, "learning_rate": 0.0006212160701214793, "loss": 1.9971, "step": 2165 }, { "epoch": 0.44, "learning_rate": 0.0006208971619894452, "loss": 1.9971, "step": 2166 }, { "epoch": 0.44, "learning_rate": 0.0006205782016127672, "loss": 2.0068, "step": 2167 }, { "epoch": 0.44, "learning_rate": 0.0006202591891292809, "loss": 2.0586, "step": 2168 }, { "epoch": 0.44, "learning_rate": 0.0006199401246768449, "loss": 2.0732, "step": 2169 }, { "epoch": 0.44, "learning_rate": 0.0006196210083933403, "loss": 2.0762, "step": 2170 }, { "epoch": 0.44, "learning_rate": 0.0006193018404166698, "loss": 2.0547, "step": 2171 }, { "epoch": 0.44, "learning_rate": 0.0006189826208847596, "loss": 2.0596, "step": 2172 }, { "epoch": 0.44, "learning_rate": 0.0006186633499355575, "loss": 2.0479, "step": 2173 }, { "epoch": 0.44, "learning_rate": 0.0006183440277070334, "loss": 1.9805, "step": 2174 }, { "epoch": 0.44, "learning_rate": 0.0006180246543371797, "loss": 1.9707, "step": 2175 }, { "epoch": 0.44, "learning_rate": 0.0006177052299640109, "loss": 2.0566, "step": 2176 }, { "epoch": 0.44, "learning_rate": 0.0006173857547255633, "loss": 2.0293, "step": 2177 }, { "epoch": 0.44, "learning_rate": 0.0006170662287598955, "loss": 1.9697, "step": 2178 }, { "epoch": 0.44, "learning_rate": 0.0006167466522050875, "loss": 2.082, "step": 2179 }, { "epoch": 0.44, "learning_rate": 0.0006164270251992418, "loss": 1.9033, "step": 2180 }, { "epoch": 0.44, "learning_rate": 0.0006161073478804822, "loss": 2.0801, "step": 2181 }, { "epoch": 0.44, "learning_rate": 0.0006157876203869546, "loss": 2.0635, "step": 2182 }, { "epoch": 0.44, "learning_rate": 0.0006154678428568262, "loss": 2.0439, "step": 2183 }, { "epoch": 0.44, "learning_rate": 0.0006151480154282865, "loss": 2.041, "step": 2184 }, { "epoch": 0.44, "learning_rate": 0.0006148281382395455, "loss": 2.0156, "step": 2185 }, { "epoch": 0.44, "learning_rate": 0.0006145082114288357, "loss": 1.9707, "step": 2186 }, { "epoch": 0.44, "learning_rate": 0.0006141882351344105, "loss": 2.0918, "step": 2187 }, { "epoch": 0.44, "learning_rate": 0.000613868209494545, "loss": 1.9482, "step": 2188 }, { "epoch": 0.44, "learning_rate": 0.0006135481346475352, "loss": 1.9824, "step": 2189 }, { "epoch": 0.44, "learning_rate": 0.000613228010731699, "loss": 2.0508, "step": 2190 }, { "epoch": 0.44, "learning_rate": 0.0006129078378853748, "loss": 2.0449, "step": 2191 }, { "epoch": 0.44, "learning_rate": 0.0006125876162469226, "loss": 2.1182, "step": 2192 }, { "epoch": 0.45, "learning_rate": 0.0006122673459547233, "loss": 1.9883, "step": 2193 }, { "epoch": 0.45, "learning_rate": 0.000611947027147179, "loss": 2.0264, "step": 2194 }, { "epoch": 0.45, "learning_rate": 0.0006116266599627126, "loss": 2.0527, "step": 2195 }, { "epoch": 0.45, "learning_rate": 0.0006113062445397679, "loss": 2.1123, "step": 2196 }, { "epoch": 0.45, "learning_rate": 0.0006109857810168098, "loss": 2.0205, "step": 2197 }, { "epoch": 0.45, "learning_rate": 0.0006106652695323236, "loss": 2.0039, "step": 2198 }, { "epoch": 0.45, "learning_rate": 0.0006103447102248153, "loss": 2.0029, "step": 2199 }, { "epoch": 0.45, "learning_rate": 0.0006100241032328124, "loss": 1.9961, "step": 2200 }, { "epoch": 0.45, "learning_rate": 0.0006097034486948618, "loss": 2.0449, "step": 2201 }, { "epoch": 0.45, "learning_rate": 0.0006093827467495319, "loss": 2.0781, "step": 2202 }, { "epoch": 0.45, "learning_rate": 0.000609061997535411, "loss": 2.0381, "step": 2203 }, { "epoch": 0.45, "learning_rate": 0.0006087412011911083, "loss": 1.9395, "step": 2204 }, { "epoch": 0.45, "learning_rate": 0.0006084203578552528, "loss": 2.0615, "step": 2205 }, { "epoch": 0.45, "learning_rate": 0.0006080994676664943, "loss": 2.0254, "step": 2206 }, { "epoch": 0.45, "learning_rate": 0.0006077785307635026, "loss": 2.04, "step": 2207 }, { "epoch": 0.45, "learning_rate": 0.000607457547284968, "loss": 2.0674, "step": 2208 }, { "epoch": 0.45, "learning_rate": 0.0006071365173696003, "loss": 2.0215, "step": 2209 }, { "epoch": 0.45, "learning_rate": 0.00060681544115613, "loss": 2.043, "step": 2210 }, { "epoch": 0.45, "learning_rate": 0.0006064943187833074, "loss": 2.0605, "step": 2211 }, { "epoch": 0.45, "learning_rate": 0.0006061731503899024, "loss": 1.9619, "step": 2212 }, { "epoch": 0.45, "learning_rate": 0.0006058519361147054, "loss": 2.0576, "step": 2213 }, { "epoch": 0.45, "learning_rate": 0.0006055306760965263, "loss": 2.0137, "step": 2214 }, { "epoch": 0.45, "learning_rate": 0.0006052093704741945, "loss": 2.0449, "step": 2215 }, { "epoch": 0.45, "learning_rate": 0.0006048880193865598, "loss": 1.9971, "step": 2216 }, { "epoch": 0.45, "learning_rate": 0.0006045666229724908, "loss": 1.9434, "step": 2217 }, { "epoch": 0.45, "learning_rate": 0.0006042451813708766, "loss": 2.1191, "step": 2218 }, { "epoch": 0.45, "learning_rate": 0.0006039236947206252, "loss": 1.9434, "step": 2219 }, { "epoch": 0.45, "learning_rate": 0.000603602163160664, "loss": 2.0059, "step": 2220 }, { "epoch": 0.45, "learning_rate": 0.0006032805868299402, "loss": 1.9248, "step": 2221 }, { "epoch": 0.45, "learning_rate": 0.0006029589658674202, "loss": 1.9951, "step": 2222 }, { "epoch": 0.45, "learning_rate": 0.0006026373004120896, "loss": 2.1191, "step": 2223 }, { "epoch": 0.45, "learning_rate": 0.0006023155906029534, "loss": 2.0615, "step": 2224 }, { "epoch": 0.45, "learning_rate": 0.0006019938365790356, "loss": 2.0498, "step": 2225 }, { "epoch": 0.45, "learning_rate": 0.0006016720384793793, "loss": 1.9854, "step": 2226 }, { "epoch": 0.45, "learning_rate": 0.0006013501964430468, "loss": 2.0664, "step": 2227 }, { "epoch": 0.45, "learning_rate": 0.0006010283106091194, "loss": 1.9609, "step": 2228 }, { "epoch": 0.45, "learning_rate": 0.0006007063811166969, "loss": 2.0684, "step": 2229 }, { "epoch": 0.45, "learning_rate": 0.0006003844081048985, "loss": 1.9336, "step": 2230 }, { "epoch": 0.45, "learning_rate": 0.0006000623917128622, "loss": 2.082, "step": 2231 }, { "epoch": 0.45, "learning_rate": 0.0005997403320797443, "loss": 2.0615, "step": 2232 }, { "epoch": 0.45, "learning_rate": 0.00059941822934472, "loss": 1.9434, "step": 2233 }, { "epoch": 0.45, "learning_rate": 0.0005990960836469832, "loss": 2.0029, "step": 2234 }, { "epoch": 0.45, "learning_rate": 0.0005987738951257467, "loss": 2.0674, "step": 2235 }, { "epoch": 0.45, "learning_rate": 0.0005984516639202408, "loss": 2.0361, "step": 2236 }, { "epoch": 0.45, "learning_rate": 0.0005981293901697152, "loss": 1.9824, "step": 2237 }, { "epoch": 0.45, "learning_rate": 0.0005978070740134379, "loss": 1.9854, "step": 2238 }, { "epoch": 0.45, "learning_rate": 0.0005974847155906944, "loss": 2.0674, "step": 2239 }, { "epoch": 0.45, "learning_rate": 0.0005971623150407896, "loss": 2.0186, "step": 2240 }, { "epoch": 0.45, "learning_rate": 0.0005968398725030459, "loss": 2.0557, "step": 2241 }, { "epoch": 0.46, "learning_rate": 0.0005965173881168037, "loss": 2.0488, "step": 2242 }, { "epoch": 0.46, "learning_rate": 0.0005961948620214223, "loss": 1.9951, "step": 2243 }, { "epoch": 0.46, "learning_rate": 0.0005958722943562782, "loss": 1.9658, "step": 2244 }, { "epoch": 0.46, "learning_rate": 0.0005955496852607661, "loss": 2.0596, "step": 2245 }, { "epoch": 0.46, "learning_rate": 0.0005952270348742991, "loss": 2.0645, "step": 2246 }, { "epoch": 0.46, "learning_rate": 0.0005949043433363072, "loss": 2.0205, "step": 2247 }, { "epoch": 0.46, "learning_rate": 0.000594581610786239, "loss": 1.9775, "step": 2248 }, { "epoch": 0.46, "learning_rate": 0.0005942588373635606, "loss": 2.0215, "step": 2249 }, { "epoch": 0.46, "learning_rate": 0.0005939360232077554, "loss": 2.1055, "step": 2250 }, { "epoch": 0.46, "learning_rate": 0.0005936131684583249, "loss": 1.9531, "step": 2251 }, { "epoch": 0.46, "learning_rate": 0.0005932902732547879, "loss": 1.998, "step": 2252 }, { "epoch": 0.46, "learning_rate": 0.0005929673377366808, "loss": 2.1279, "step": 2253 }, { "epoch": 0.46, "learning_rate": 0.0005926443620435572, "loss": 2.0664, "step": 2254 }, { "epoch": 0.46, "learning_rate": 0.0005923213463149882, "loss": 2.0117, "step": 2255 }, { "epoch": 0.46, "learning_rate": 0.0005919982906905625, "loss": 2.0645, "step": 2256 }, { "epoch": 0.46, "learning_rate": 0.0005916751953098853, "loss": 2.0361, "step": 2257 }, { "epoch": 0.46, "learning_rate": 0.0005913520603125794, "loss": 1.959, "step": 2258 }, { "epoch": 0.46, "learning_rate": 0.0005910288858382855, "loss": 2.0166, "step": 2259 }, { "epoch": 0.46, "learning_rate": 0.0005907056720266598, "loss": 2.0625, "step": 2260 }, { "epoch": 0.46, "learning_rate": 0.0005903824190173765, "loss": 2.0166, "step": 2261 }, { "epoch": 0.46, "learning_rate": 0.0005900591269501268, "loss": 1.9932, "step": 2262 }, { "epoch": 0.46, "learning_rate": 0.0005897357959646183, "loss": 2.0762, "step": 2263 }, { "epoch": 0.46, "learning_rate": 0.0005894124262005758, "loss": 2.1182, "step": 2264 }, { "epoch": 0.46, "learning_rate": 0.0005890890177977403, "loss": 2.043, "step": 2265 }, { "epoch": 0.46, "learning_rate": 0.0005887655708958704, "loss": 1.9424, "step": 2266 }, { "epoch": 0.46, "learning_rate": 0.0005884420856347405, "loss": 1.9473, "step": 2267 }, { "epoch": 0.46, "learning_rate": 0.0005881185621541419, "loss": 2.0215, "step": 2268 }, { "epoch": 0.46, "learning_rate": 0.0005877950005938827, "loss": 2.0918, "step": 2269 }, { "epoch": 0.46, "learning_rate": 0.0005874714010937871, "loss": 2.0146, "step": 2270 }, { "epoch": 0.46, "learning_rate": 0.0005871477637936955, "loss": 2.0195, "step": 2271 }, { "epoch": 0.46, "learning_rate": 0.0005868240888334653, "loss": 1.9873, "step": 2272 }, { "epoch": 0.46, "learning_rate": 0.0005865003763529694, "loss": 2.083, "step": 2273 }, { "epoch": 0.46, "learning_rate": 0.0005861766264920975, "loss": 1.9863, "step": 2274 }, { "epoch": 0.46, "learning_rate": 0.0005858528393907552, "loss": 2.0059, "step": 2275 }, { "epoch": 0.46, "learning_rate": 0.0005855290151888644, "loss": 2.0312, "step": 2276 }, { "epoch": 0.46, "learning_rate": 0.0005852051540263627, "loss": 2.0469, "step": 2277 }, { "epoch": 0.46, "learning_rate": 0.000584881256043204, "loss": 1.9844, "step": 2278 }, { "epoch": 0.46, "learning_rate": 0.0005845573213793577, "loss": 2.1035, "step": 2279 }, { "epoch": 0.46, "learning_rate": 0.0005842333501748096, "loss": 2.0625, "step": 2280 }, { "epoch": 0.46, "learning_rate": 0.0005839093425695609, "loss": 1.9668, "step": 2281 }, { "epoch": 0.46, "learning_rate": 0.0005835852987036285, "loss": 2.1484, "step": 2282 }, { "epoch": 0.46, "learning_rate": 0.0005832612187170453, "loss": 2.0791, "step": 2283 }, { "epoch": 0.46, "learning_rate": 0.0005829371027498596, "loss": 1.9785, "step": 2284 }, { "epoch": 0.46, "learning_rate": 0.0005826129509421351, "loss": 1.9893, "step": 2285 }, { "epoch": 0.46, "learning_rate": 0.0005822887634339512, "loss": 2.0889, "step": 2286 }, { "epoch": 0.46, "learning_rate": 0.0005819645403654027, "loss": 1.9629, "step": 2287 }, { "epoch": 0.46, "learning_rate": 0.0005816402818766, "loss": 2.0312, "step": 2288 }, { "epoch": 0.46, "learning_rate": 0.0005813159881076681, "loss": 1.9268, "step": 2289 }, { "epoch": 0.46, "learning_rate": 0.0005809916591987479, "loss": 2.0195, "step": 2290 }, { "epoch": 0.46, "learning_rate": 0.0005806672952899954, "loss": 1.9971, "step": 2291 }, { "epoch": 0.47, "learning_rate": 0.0005803428965215815, "loss": 2.0342, "step": 2292 }, { "epoch": 0.47, "learning_rate": 0.0005800184630336922, "loss": 1.9941, "step": 2293 }, { "epoch": 0.47, "learning_rate": 0.0005796939949665287, "loss": 2.1299, "step": 2294 }, { "epoch": 0.47, "learning_rate": 0.0005793694924603071, "loss": 2.0332, "step": 2295 }, { "epoch": 0.47, "learning_rate": 0.000579044955655258, "loss": 1.9902, "step": 2296 }, { "epoch": 0.47, "learning_rate": 0.0005787203846916272, "loss": 2.0869, "step": 2297 }, { "epoch": 0.47, "learning_rate": 0.0005783957797096755, "loss": 2.0029, "step": 2298 }, { "epoch": 0.47, "learning_rate": 0.0005780711408496777, "loss": 1.9688, "step": 2299 }, { "epoch": 0.47, "learning_rate": 0.0005777464682519239, "loss": 1.9463, "step": 2300 }, { "epoch": 0.47, "learning_rate": 0.0005774217620567183, "loss": 2.0088, "step": 2301 }, { "epoch": 0.47, "learning_rate": 0.0005770970224043802, "loss": 2.0107, "step": 2302 }, { "epoch": 0.47, "learning_rate": 0.0005767722494352422, "loss": 2.0342, "step": 2303 }, { "epoch": 0.47, "learning_rate": 0.0005764474432896528, "loss": 1.915, "step": 2304 }, { "epoch": 0.47, "learning_rate": 0.000576122604107974, "loss": 2.0039, "step": 2305 }, { "epoch": 0.47, "learning_rate": 0.0005757977320305821, "loss": 2.043, "step": 2306 }, { "epoch": 0.47, "learning_rate": 0.0005754728271978675, "loss": 2.0918, "step": 2307 }, { "epoch": 0.47, "learning_rate": 0.0005751478897502352, "loss": 1.9834, "step": 2308 }, { "epoch": 0.47, "learning_rate": 0.0005748229198281041, "loss": 1.9404, "step": 2309 }, { "epoch": 0.47, "learning_rate": 0.0005744979175719069, "loss": 1.9775, "step": 2310 }, { "epoch": 0.47, "learning_rate": 0.0005741728831220907, "loss": 2.0029, "step": 2311 }, { "epoch": 0.47, "learning_rate": 0.000573847816619116, "loss": 1.9746, "step": 2312 }, { "epoch": 0.47, "learning_rate": 0.0005735227182034577, "loss": 2.0576, "step": 2313 }, { "epoch": 0.47, "learning_rate": 0.000573197588015604, "loss": 1.999, "step": 2314 }, { "epoch": 0.47, "learning_rate": 0.0005728724261960572, "loss": 2.0215, "step": 2315 }, { "epoch": 0.47, "learning_rate": 0.0005725472328853329, "loss": 2.0391, "step": 2316 }, { "epoch": 0.47, "learning_rate": 0.0005722220082239607, "loss": 1.9932, "step": 2317 }, { "epoch": 0.47, "learning_rate": 0.0005718967523524837, "loss": 2.0234, "step": 2318 }, { "epoch": 0.47, "learning_rate": 0.0005715714654114581, "loss": 2.0068, "step": 2319 }, { "epoch": 0.47, "learning_rate": 0.000571246147541454, "loss": 1.9463, "step": 2320 }, { "epoch": 0.47, "learning_rate": 0.0005709207988830545, "loss": 2.0498, "step": 2321 }, { "epoch": 0.47, "learning_rate": 0.0005705954195768561, "loss": 2.0332, "step": 2322 }, { "epoch": 0.47, "learning_rate": 0.000570270009763469, "loss": 2.0674, "step": 2323 }, { "epoch": 0.47, "learning_rate": 0.0005699445695835155, "loss": 2.0869, "step": 2324 }, { "epoch": 0.47, "learning_rate": 0.0005696190991776323, "loss": 2.0635, "step": 2325 }, { "epoch": 0.47, "learning_rate": 0.0005692935986864685, "loss": 2.0732, "step": 2326 }, { "epoch": 0.47, "learning_rate": 0.000568968068250686, "loss": 1.9609, "step": 2327 }, { "epoch": 0.47, "learning_rate": 0.00056864250801096, "loss": 1.9893, "step": 2328 }, { "epoch": 0.47, "learning_rate": 0.0005683169181079787, "loss": 2.0312, "step": 2329 }, { "epoch": 0.47, "learning_rate": 0.0005679912986824427, "loss": 2.0273, "step": 2330 }, { "epoch": 0.47, "learning_rate": 0.0005676656498750656, "loss": 1.9863, "step": 2331 }, { "epoch": 0.47, "learning_rate": 0.0005673399718265737, "loss": 2.0166, "step": 2332 }, { "epoch": 0.47, "learning_rate": 0.0005670142646777059, "loss": 1.9766, "step": 2333 }, { "epoch": 0.47, "learning_rate": 0.0005666885285692137, "loss": 2.0186, "step": 2334 }, { "epoch": 0.47, "learning_rate": 0.0005663627636418611, "loss": 1.9658, "step": 2335 }, { "epoch": 0.47, "learning_rate": 0.0005660369700364242, "loss": 1.9658, "step": 2336 }, { "epoch": 0.47, "learning_rate": 0.0005657111478936926, "loss": 2.0078, "step": 2337 }, { "epoch": 0.47, "learning_rate": 0.0005653852973544666, "loss": 2.0986, "step": 2338 }, { "epoch": 0.47, "learning_rate": 0.0005650594185595604, "loss": 2.0, "step": 2339 }, { "epoch": 0.47, "learning_rate": 0.0005647335116497992, "loss": 2.0469, "step": 2340 }, { "epoch": 0.48, "learning_rate": 0.0005644075767660209, "loss": 2.1289, "step": 2341 }, { "epoch": 0.48, "learning_rate": 0.0005640816140490753, "loss": 1.9541, "step": 2342 }, { "epoch": 0.48, "learning_rate": 0.0005637556236398246, "loss": 2.0449, "step": 2343 }, { "epoch": 0.48, "learning_rate": 0.0005634296056791423, "loss": 2.0518, "step": 2344 }, { "epoch": 0.48, "learning_rate": 0.0005631035603079146, "loss": 1.9492, "step": 2345 }, { "epoch": 0.48, "learning_rate": 0.0005627774876670388, "loss": 2.0254, "step": 2346 }, { "epoch": 0.48, "learning_rate": 0.0005624513878974244, "loss": 2.0908, "step": 2347 }, { "epoch": 0.48, "learning_rate": 0.0005621252611399927, "loss": 1.9893, "step": 2348 }, { "epoch": 0.48, "learning_rate": 0.0005617991075356763, "loss": 2.0322, "step": 2349 }, { "epoch": 0.48, "learning_rate": 0.0005614729272254199, "loss": 2.0557, "step": 2350 }, { "epoch": 0.48, "learning_rate": 0.0005611467203501791, "loss": 2.0469, "step": 2351 }, { "epoch": 0.48, "learning_rate": 0.0005608204870509214, "loss": 1.998, "step": 2352 }, { "epoch": 0.48, "learning_rate": 0.000560494227468626, "loss": 2.125, "step": 2353 }, { "epoch": 0.48, "learning_rate": 0.0005601679417442827, "loss": 2.1152, "step": 2354 }, { "epoch": 0.48, "learning_rate": 0.0005598416300188931, "loss": 2.0352, "step": 2355 }, { "epoch": 0.48, "learning_rate": 0.00055951529243347, "loss": 2.0117, "step": 2356 }, { "epoch": 0.48, "learning_rate": 0.0005591889291290373, "loss": 2.0566, "step": 2357 }, { "epoch": 0.48, "learning_rate": 0.0005588625402466302, "loss": 1.9902, "step": 2358 }, { "epoch": 0.48, "learning_rate": 0.0005585361259272943, "loss": 2.0801, "step": 2359 }, { "epoch": 0.48, "learning_rate": 0.0005582096863120872, "loss": 2.0049, "step": 2360 }, { "epoch": 0.48, "learning_rate": 0.0005578832215420767, "loss": 1.9766, "step": 2361 }, { "epoch": 0.48, "learning_rate": 0.0005575567317583414, "loss": 1.9658, "step": 2362 }, { "epoch": 0.48, "learning_rate": 0.0005572302171019715, "loss": 1.9775, "step": 2363 }, { "epoch": 0.48, "learning_rate": 0.0005569036777140672, "loss": 2.1045, "step": 2364 }, { "epoch": 0.48, "learning_rate": 0.0005565771137357395, "loss": 2.002, "step": 2365 }, { "epoch": 0.48, "learning_rate": 0.0005562505253081103, "loss": 2.0957, "step": 2366 }, { "epoch": 0.48, "learning_rate": 0.0005559239125723119, "loss": 2.0088, "step": 2367 }, { "epoch": 0.48, "learning_rate": 0.0005555972756694869, "loss": 2.0107, "step": 2368 }, { "epoch": 0.48, "learning_rate": 0.0005552706147407888, "loss": 2.0605, "step": 2369 }, { "epoch": 0.48, "learning_rate": 0.0005549439299273814, "loss": 2.0781, "step": 2370 }, { "epoch": 0.48, "learning_rate": 0.0005546172213704383, "loss": 1.9805, "step": 2371 }, { "epoch": 0.48, "learning_rate": 0.000554290489211144, "loss": 1.9834, "step": 2372 }, { "epoch": 0.48, "learning_rate": 0.0005539637335906926, "loss": 1.9521, "step": 2373 }, { "epoch": 0.48, "learning_rate": 0.0005536369546502891, "loss": 2.1377, "step": 2374 }, { "epoch": 0.48, "learning_rate": 0.0005533101525311477, "loss": 1.9531, "step": 2375 }, { "epoch": 0.48, "learning_rate": 0.0005529833273744933, "loss": 1.9678, "step": 2376 }, { "epoch": 0.48, "learning_rate": 0.0005526564793215606, "loss": 2.0039, "step": 2377 }, { "epoch": 0.48, "learning_rate": 0.000552329608513594, "loss": 2.0166, "step": 2378 }, { "epoch": 0.48, "learning_rate": 0.0005520027150918477, "loss": 2.0547, "step": 2379 }, { "epoch": 0.48, "learning_rate": 0.000551675799197586, "loss": 2.0742, "step": 2380 }, { "epoch": 0.48, "learning_rate": 0.0005513488609720827, "loss": 2.0811, "step": 2381 }, { "epoch": 0.48, "learning_rate": 0.0005510219005566214, "loss": 2.0918, "step": 2382 }, { "epoch": 0.48, "learning_rate": 0.0005506949180924947, "loss": 1.9629, "step": 2383 }, { "epoch": 0.48, "learning_rate": 0.0005503679137210057, "loss": 2.0547, "step": 2384 }, { "epoch": 0.48, "learning_rate": 0.0005500408875834664, "loss": 2.043, "step": 2385 }, { "epoch": 0.48, "learning_rate": 0.0005497138398211979, "loss": 1.9678, "step": 2386 }, { "epoch": 0.48, "learning_rate": 0.0005493867705755312, "loss": 2.0615, "step": 2387 }, { "epoch": 0.48, "learning_rate": 0.0005490596799878067, "loss": 2.1045, "step": 2388 }, { "epoch": 0.48, "learning_rate": 0.0005487325681993732, "loss": 1.9668, "step": 2389 }, { "epoch": 0.49, "learning_rate": 0.0005484054353515896, "loss": 2.0293, "step": 2390 }, { "epoch": 0.49, "learning_rate": 0.0005480782815858233, "loss": 2.0381, "step": 2391 }, { "epoch": 0.49, "learning_rate": 0.0005477511070434509, "loss": 1.9727, "step": 2392 }, { "epoch": 0.49, "learning_rate": 0.0005474239118658579, "loss": 2.0225, "step": 2393 }, { "epoch": 0.49, "learning_rate": 0.0005470966961944392, "loss": 2.0537, "step": 2394 }, { "epoch": 0.49, "learning_rate": 0.0005467694601705977, "loss": 2.0557, "step": 2395 }, { "epoch": 0.49, "learning_rate": 0.000546442203935746, "loss": 2.043, "step": 2396 }, { "epoch": 0.49, "learning_rate": 0.0005461149276313046, "loss": 2.0156, "step": 2397 }, { "epoch": 0.49, "learning_rate": 0.0005457876313987033, "loss": 1.9912, "step": 2398 }, { "epoch": 0.49, "learning_rate": 0.0005454603153793804, "loss": 1.9873, "step": 2399 }, { "epoch": 0.49, "learning_rate": 0.0005451329797147822, "loss": 2.0547, "step": 2400 }, { "epoch": 0.49, "learning_rate": 0.0005448056245463644, "loss": 2.0107, "step": 2401 }, { "epoch": 0.49, "learning_rate": 0.0005444782500155904, "loss": 2.0225, "step": 2402 }, { "epoch": 0.49, "learning_rate": 0.0005441508562639322, "loss": 1.9775, "step": 2403 }, { "epoch": 0.49, "learning_rate": 0.0005438234434328704, "loss": 1.9922, "step": 2404 }, { "epoch": 0.49, "learning_rate": 0.0005434960116638932, "loss": 2.0664, "step": 2405 }, { "epoch": 0.49, "learning_rate": 0.0005431685610984976, "loss": 1.9971, "step": 2406 }, { "epoch": 0.49, "learning_rate": 0.0005428410918781884, "loss": 1.9717, "step": 2407 }, { "epoch": 0.49, "learning_rate": 0.0005425136041444786, "loss": 1.9717, "step": 2408 }, { "epoch": 0.49, "learning_rate": 0.0005421860980388892, "loss": 2.0234, "step": 2409 }, { "epoch": 0.49, "learning_rate": 0.0005418585737029489, "loss": 1.9727, "step": 2410 }, { "epoch": 0.49, "learning_rate": 0.0005415310312781943, "loss": 1.9863, "step": 2411 }, { "epoch": 0.49, "learning_rate": 0.0005412034709061705, "loss": 1.9912, "step": 2412 }, { "epoch": 0.49, "learning_rate": 0.0005408758927284294, "loss": 1.9316, "step": 2413 }, { "epoch": 0.49, "learning_rate": 0.0005405482968865312, "loss": 2.0098, "step": 2414 }, { "epoch": 0.49, "learning_rate": 0.0005402206835220435, "loss": 2.0059, "step": 2415 }, { "epoch": 0.49, "learning_rate": 0.0005398930527765415, "loss": 1.9893, "step": 2416 }, { "epoch": 0.49, "learning_rate": 0.0005395654047916082, "loss": 1.9473, "step": 2417 }, { "epoch": 0.49, "learning_rate": 0.0005392377397088334, "loss": 2.041, "step": 2418 }, { "epoch": 0.49, "learning_rate": 0.0005389100576698149, "loss": 1.9648, "step": 2419 }, { "epoch": 0.49, "learning_rate": 0.0005385823588161577, "loss": 1.9619, "step": 2420 }, { "epoch": 0.49, "learning_rate": 0.0005382546432894737, "loss": 2.1045, "step": 2421 }, { "epoch": 0.49, "learning_rate": 0.0005379269112313823, "loss": 2.0215, "step": 2422 }, { "epoch": 0.49, "learning_rate": 0.0005375991627835103, "loss": 1.9697, "step": 2423 }, { "epoch": 0.49, "learning_rate": 0.0005372713980874913, "loss": 2.0342, "step": 2424 }, { "epoch": 0.49, "learning_rate": 0.0005369436172849657, "loss": 1.9727, "step": 2425 }, { "epoch": 0.49, "learning_rate": 0.000536615820517581, "loss": 2.0576, "step": 2426 }, { "epoch": 0.49, "learning_rate": 0.0005362880079269921, "loss": 1.9473, "step": 2427 }, { "epoch": 0.49, "learning_rate": 0.00053596017965486, "loss": 2.041, "step": 2428 }, { "epoch": 0.49, "learning_rate": 0.0005356323358428528, "loss": 1.9424, "step": 2429 }, { "epoch": 0.49, "learning_rate": 0.0005353044766326455, "loss": 2.0342, "step": 2430 }, { "epoch": 0.49, "learning_rate": 0.0005349766021659195, "loss": 1.9502, "step": 2431 }, { "epoch": 0.49, "learning_rate": 0.0005346487125843627, "loss": 1.9775, "step": 2432 }, { "epoch": 0.49, "learning_rate": 0.00053432080802967, "loss": 1.9199, "step": 2433 }, { "epoch": 0.49, "learning_rate": 0.0005339928886435423, "loss": 2.0977, "step": 2434 }, { "epoch": 0.49, "learning_rate": 0.0005336649545676869, "loss": 2.082, "step": 2435 }, { "epoch": 0.49, "learning_rate": 0.0005333370059438179, "loss": 1.9824, "step": 2436 }, { "epoch": 0.49, "learning_rate": 0.0005330090429136552, "loss": 2.0381, "step": 2437 }, { "epoch": 0.49, "learning_rate": 0.0005326810656189254, "loss": 2.0186, "step": 2438 }, { "epoch": 0.5, "learning_rate": 0.0005323530742013608, "loss": 1.9434, "step": 2439 }, { "epoch": 0.5, "learning_rate": 0.0005320250688027, "loss": 2.0215, "step": 2440 }, { "epoch": 0.5, "learning_rate": 0.0005316970495646878, "loss": 2.0859, "step": 2441 }, { "epoch": 0.5, "learning_rate": 0.0005313690166290746, "loss": 1.9941, "step": 2442 }, { "epoch": 0.5, "learning_rate": 0.000531040970137617, "loss": 2.0029, "step": 2443 }, { "epoch": 0.5, "learning_rate": 0.0005307129102320777, "loss": 2.0098, "step": 2444 }, { "epoch": 0.5, "learning_rate": 0.0005303848370542246, "loss": 2.0508, "step": 2445 }, { "epoch": 0.5, "learning_rate": 0.0005300567507458315, "loss": 2.041, "step": 2446 }, { "epoch": 0.5, "learning_rate": 0.0005297286514486785, "loss": 1.9814, "step": 2447 }, { "epoch": 0.5, "learning_rate": 0.0005294005393045502, "loss": 2.0312, "step": 2448 }, { "epoch": 0.5, "learning_rate": 0.0005290724144552379, "loss": 1.9873, "step": 2449 }, { "epoch": 0.5, "learning_rate": 0.0005287442770425377, "loss": 2.0361, "step": 2450 }, { "epoch": 0.5, "learning_rate": 0.0005284161272082513, "loss": 1.8965, "step": 2451 }, { "epoch": 0.5, "learning_rate": 0.0005280879650941857, "loss": 2.0898, "step": 2452 }, { "epoch": 0.5, "learning_rate": 0.0005277597908421531, "loss": 1.9492, "step": 2453 }, { "epoch": 0.5, "learning_rate": 0.0005274316045939714, "loss": 1.9316, "step": 2454 }, { "epoch": 0.5, "learning_rate": 0.0005271034064914634, "loss": 2.0137, "step": 2455 }, { "epoch": 0.5, "learning_rate": 0.0005267751966764569, "loss": 1.9707, "step": 2456 }, { "epoch": 0.5, "learning_rate": 0.0005264469752907848, "loss": 2.0205, "step": 2457 }, { "epoch": 0.5, "learning_rate": 0.0005261187424762854, "loss": 1.9834, "step": 2458 }, { "epoch": 0.5, "learning_rate": 0.0005257904983748013, "loss": 1.999, "step": 2459 }, { "epoch": 0.5, "learning_rate": 0.0005254622431281804, "loss": 2.0439, "step": 2460 }, { "epoch": 0.5, "learning_rate": 0.0005251339768782753, "loss": 1.916, "step": 2461 }, { "epoch": 0.5, "learning_rate": 0.0005248056997669434, "loss": 1.9844, "step": 2462 }, { "epoch": 0.5, "learning_rate": 0.0005244774119360468, "loss": 1.918, "step": 2463 }, { "epoch": 0.5, "learning_rate": 0.0005241491135274521, "loss": 2.0, "step": 2464 }, { "epoch": 0.5, "learning_rate": 0.0005238208046830307, "loss": 2.0088, "step": 2465 }, { "epoch": 0.5, "learning_rate": 0.0005234924855446584, "loss": 2.043, "step": 2466 }, { "epoch": 0.5, "learning_rate": 0.000523164156254215, "loss": 1.9482, "step": 2467 }, { "epoch": 0.5, "learning_rate": 0.0005228358169535856, "loss": 2.002, "step": 2468 }, { "epoch": 0.5, "learning_rate": 0.000522507467784659, "loss": 2.0674, "step": 2469 }, { "epoch": 0.5, "learning_rate": 0.0005221791088893282, "loss": 1.9971, "step": 2470 }, { "epoch": 0.5, "learning_rate": 0.000521850740409491, "loss": 2.0059, "step": 2471 }, { "epoch": 0.5, "learning_rate": 0.0005215223624870487, "loss": 2.0332, "step": 2472 }, { "epoch": 0.5, "learning_rate": 0.0005211939752639067, "loss": 2.0264, "step": 2473 }, { "epoch": 0.5, "learning_rate": 0.0005208655788819751, "loss": 1.9824, "step": 2474 }, { "epoch": 0.5, "learning_rate": 0.0005205371734831675, "loss": 2.0234, "step": 2475 }, { "epoch": 0.5, "learning_rate": 0.000520208759209401, "loss": 1.9795, "step": 2476 }, { "epoch": 0.5, "learning_rate": 0.000519880336202597, "loss": 1.9854, "step": 2477 }, { "epoch": 0.5, "learning_rate": 0.0005195519046046808, "loss": 2.0635, "step": 2478 }, { "epoch": 0.5, "learning_rate": 0.0005192234645575814, "loss": 2.0215, "step": 2479 }, { "epoch": 0.5, "learning_rate": 0.0005188950162032309, "loss": 1.9307, "step": 2480 }, { "epoch": 0.5, "learning_rate": 0.0005185665596835655, "loss": 2.0176, "step": 2481 }, { "epoch": 0.5, "learning_rate": 0.0005182380951405249, "loss": 2.0312, "step": 2482 }, { "epoch": 0.5, "learning_rate": 0.000517909622716052, "loss": 2.0381, "step": 2483 }, { "epoch": 0.5, "learning_rate": 0.0005175811425520933, "loss": 2.0781, "step": 2484 }, { "epoch": 0.5, "learning_rate": 0.0005172526547905987, "loss": 1.9307, "step": 2485 }, { "epoch": 0.5, "learning_rate": 0.0005169241595735213, "loss": 1.9912, "step": 2486 }, { "epoch": 0.5, "learning_rate": 0.0005165956570428172, "loss": 1.9766, "step": 2487 }, { "epoch": 0.5, "learning_rate": 0.0005162671473404461, "loss": 2.0254, "step": 2488 }, { "epoch": 0.51, "learning_rate": 0.0005159386306083705, "loss": 1.9795, "step": 2489 }, { "epoch": 0.51, "learning_rate": 0.0005156101069885563, "loss": 1.9834, "step": 2490 }, { "epoch": 0.51, "learning_rate": 0.0005152815766229714, "loss": 2.1152, "step": 2491 }, { "epoch": 0.51, "learning_rate": 0.0005149530396535879, "loss": 1.9971, "step": 2492 }, { "epoch": 0.51, "learning_rate": 0.0005146244962223799, "loss": 2.0586, "step": 2493 }, { "epoch": 0.51, "learning_rate": 0.0005142959464713247, "loss": 2.0264, "step": 2494 }, { "epoch": 0.51, "learning_rate": 0.0005139673905424021, "loss": 2.0186, "step": 2495 }, { "epoch": 0.51, "learning_rate": 0.0005136388285775946, "loss": 1.9893, "step": 2496 }, { "epoch": 0.51, "learning_rate": 0.0005133102607188875, "loss": 2.1211, "step": 2497 }, { "epoch": 0.51, "learning_rate": 0.0005129816871082683, "loss": 1.9775, "step": 2498 }, { "epoch": 0.51, "learning_rate": 0.0005126531078877274, "loss": 1.9717, "step": 2499 }, { "epoch": 0.51, "learning_rate": 0.0005123245231992573, "loss": 1.9199, "step": 2500 }, { "epoch": 0.51, "learning_rate": 0.0005119959331848531, "loss": 1.9912, "step": 2501 }, { "epoch": 0.51, "learning_rate": 0.0005116673379865119, "loss": 1.9531, "step": 2502 }, { "epoch": 0.51, "learning_rate": 0.0005113387377462334, "loss": 1.9492, "step": 2503 }, { "epoch": 0.51, "learning_rate": 0.0005110101326060192, "loss": 2.1455, "step": 2504 }, { "epoch": 0.51, "learning_rate": 0.0005106815227078729, "loss": 2.1533, "step": 2505 }, { "epoch": 0.51, "learning_rate": 0.000510352908193801, "loss": 1.9785, "step": 2506 }, { "epoch": 0.51, "learning_rate": 0.0005100242892058108, "loss": 1.9795, "step": 2507 }, { "epoch": 0.51, "learning_rate": 0.0005096956658859122, "loss": 2.0674, "step": 2508 }, { "epoch": 0.51, "learning_rate": 0.0005093670383761171, "loss": 1.9873, "step": 2509 }, { "epoch": 0.51, "learning_rate": 0.0005090384068184387, "loss": 2.0791, "step": 2510 }, { "epoch": 0.51, "learning_rate": 0.0005087097713548926, "loss": 2.0088, "step": 2511 }, { "epoch": 0.51, "learning_rate": 0.0005083811321274953, "loss": 2.0479, "step": 2512 }, { "epoch": 0.51, "learning_rate": 0.0005080524892782655, "loss": 2.1641, "step": 2513 }, { "epoch": 0.51, "learning_rate": 0.0005077238429492236, "loss": 1.8809, "step": 2514 }, { "epoch": 0.51, "learning_rate": 0.0005073951932823907, "loss": 2.0283, "step": 2515 }, { "epoch": 0.51, "learning_rate": 0.0005070665404197903, "loss": 1.9551, "step": 2516 }, { "epoch": 0.51, "learning_rate": 0.0005067378845034467, "loss": 2.0352, "step": 2517 }, { "epoch": 0.51, "learning_rate": 0.0005064092256753856, "loss": 2.084, "step": 2518 }, { "epoch": 0.51, "learning_rate": 0.000506080564077634, "loss": 1.9961, "step": 2519 }, { "epoch": 0.51, "learning_rate": 0.0005057518998522202, "loss": 1.9199, "step": 2520 }, { "epoch": 0.51, "learning_rate": 0.0005054232331411735, "loss": 2.0205, "step": 2521 }, { "epoch": 0.51, "learning_rate": 0.0005050945640865244, "loss": 1.8857, "step": 2522 }, { "epoch": 0.51, "learning_rate": 0.0005047658928303043, "loss": 1.9902, "step": 2523 }, { "epoch": 0.51, "learning_rate": 0.0005044372195145455, "loss": 2.0029, "step": 2524 }, { "epoch": 0.51, "learning_rate": 0.0005041085442812815, "loss": 2.0615, "step": 2525 }, { "epoch": 0.51, "learning_rate": 0.0005037798672725461, "loss": 1.9492, "step": 2526 }, { "epoch": 0.51, "learning_rate": 0.0005034511886303746, "loss": 1.9941, "step": 2527 }, { "epoch": 0.51, "learning_rate": 0.000503122508496802, "loss": 2.0498, "step": 2528 }, { "epoch": 0.51, "learning_rate": 0.000502793827013865, "loss": 1.9785, "step": 2529 }, { "epoch": 0.51, "learning_rate": 0.0005024651443236003, "loss": 2.0537, "step": 2530 }, { "epoch": 0.51, "learning_rate": 0.000502136460568045, "loss": 1.9639, "step": 2531 }, { "epoch": 0.51, "learning_rate": 0.0005018077758892372, "loss": 2.0283, "step": 2532 }, { "epoch": 0.51, "learning_rate": 0.0005014790904292149, "loss": 2.0635, "step": 2533 }, { "epoch": 0.51, "learning_rate": 0.0005011504043300167, "loss": 1.9355, "step": 2534 }, { "epoch": 0.51, "learning_rate": 0.0005008217177336817, "loss": 1.9795, "step": 2535 }, { "epoch": 0.51, "learning_rate": 0.0005004930307822481, "loss": 2.1133, "step": 2536 }, { "epoch": 0.51, "learning_rate": 0.000500164343617756, "loss": 2.043, "step": 2537 }, { "epoch": 0.52, "learning_rate": 0.000499835656382244, "loss": 1.9775, "step": 2538 }, { "epoch": 0.52, "learning_rate": 0.0004995069692177519, "loss": 1.9316, "step": 2539 }, { "epoch": 0.52, "learning_rate": 0.0004991782822663186, "loss": 2.0068, "step": 2540 }, { "epoch": 0.52, "learning_rate": 0.0004988495956699832, "loss": 1.9658, "step": 2541 }, { "epoch": 0.52, "learning_rate": 0.0004985209095707851, "loss": 1.9951, "step": 2542 }, { "epoch": 0.52, "learning_rate": 0.0004981922241107629, "loss": 1.8896, "step": 2543 }, { "epoch": 0.52, "learning_rate": 0.0004978635394319549, "loss": 2.0547, "step": 2544 }, { "epoch": 0.52, "learning_rate": 0.0004975348556763999, "loss": 1.9326, "step": 2545 }, { "epoch": 0.52, "learning_rate": 0.0004972061729861351, "loss": 2.0342, "step": 2546 }, { "epoch": 0.52, "learning_rate": 0.0004968774915031981, "loss": 1.9502, "step": 2547 }, { "epoch": 0.52, "learning_rate": 0.0004965488113696257, "loss": 2.0498, "step": 2548 }, { "epoch": 0.52, "learning_rate": 0.000496220132727454, "loss": 1.9971, "step": 2549 }, { "epoch": 0.52, "learning_rate": 0.0004958914557187187, "loss": 2.0625, "step": 2550 }, { "epoch": 0.52, "learning_rate": 0.0004955627804854544, "loss": 1.9648, "step": 2551 }, { "epoch": 0.52, "learning_rate": 0.0004952341071696959, "loss": 2.0283, "step": 2552 }, { "epoch": 0.52, "learning_rate": 0.0004949054359134757, "loss": 2.0557, "step": 2553 }, { "epoch": 0.52, "learning_rate": 0.0004945767668588264, "loss": 1.9893, "step": 2554 }, { "epoch": 0.52, "learning_rate": 0.0004942481001477799, "loss": 2.0244, "step": 2555 }, { "epoch": 0.52, "learning_rate": 0.0004939194359223662, "loss": 1.9932, "step": 2556 }, { "epoch": 0.52, "learning_rate": 0.0004935907743246145, "loss": 1.9189, "step": 2557 }, { "epoch": 0.52, "learning_rate": 0.0004932621154965534, "loss": 1.9561, "step": 2558 }, { "epoch": 0.52, "learning_rate": 0.0004929334595802098, "loss": 2.0254, "step": 2559 }, { "epoch": 0.52, "learning_rate": 0.0004926048067176093, "loss": 1.8916, "step": 2560 }, { "epoch": 0.52, "learning_rate": 0.0004922761570507765, "loss": 2.002, "step": 2561 }, { "epoch": 0.52, "learning_rate": 0.0004919475107217346, "loss": 2.002, "step": 2562 }, { "epoch": 0.52, "learning_rate": 0.0004916188678725049, "loss": 2.0635, "step": 2563 }, { "epoch": 0.52, "learning_rate": 0.0004912902286451074, "loss": 2.0469, "step": 2564 }, { "epoch": 0.52, "learning_rate": 0.0004909615931815613, "loss": 2.0732, "step": 2565 }, { "epoch": 0.52, "learning_rate": 0.0004906329616238831, "loss": 1.9092, "step": 2566 }, { "epoch": 0.52, "learning_rate": 0.0004903043341140879, "loss": 1.9873, "step": 2567 }, { "epoch": 0.52, "learning_rate": 0.0004899757107941894, "loss": 2.0225, "step": 2568 }, { "epoch": 0.52, "learning_rate": 0.0004896470918061992, "loss": 2.0361, "step": 2569 }, { "epoch": 0.52, "learning_rate": 0.000489318477292127, "loss": 2.0059, "step": 2570 }, { "epoch": 0.52, "learning_rate": 0.000488989867393981, "loss": 2.0479, "step": 2571 }, { "epoch": 0.52, "learning_rate": 0.0004886612622537668, "loss": 1.9541, "step": 2572 }, { "epoch": 0.52, "learning_rate": 0.0004883326620134881, "loss": 2.0127, "step": 2573 }, { "epoch": 0.52, "learning_rate": 0.0004880040668151471, "loss": 1.9717, "step": 2574 }, { "epoch": 0.52, "learning_rate": 0.0004876754768007428, "loss": 2.0371, "step": 2575 }, { "epoch": 0.52, "learning_rate": 0.0004873468921122726, "loss": 2.0195, "step": 2576 }, { "epoch": 0.52, "learning_rate": 0.0004870183128917318, "loss": 2.082, "step": 2577 }, { "epoch": 0.52, "learning_rate": 0.0004866897392811127, "loss": 1.9688, "step": 2578 }, { "epoch": 0.52, "learning_rate": 0.0004863611714224054, "loss": 1.9697, "step": 2579 }, { "epoch": 0.52, "learning_rate": 0.000486032609457598, "loss": 2.1094, "step": 2580 }, { "epoch": 0.52, "learning_rate": 0.00048570405352867536, "loss": 2.0508, "step": 2581 }, { "epoch": 0.52, "learning_rate": 0.0004853755037776202, "loss": 2.0381, "step": 2582 }, { "epoch": 0.52, "learning_rate": 0.00048504696034641214, "loss": 1.9492, "step": 2583 }, { "epoch": 0.52, "learning_rate": 0.0004847184233770288, "loss": 1.9492, "step": 2584 }, { "epoch": 0.52, "learning_rate": 0.000484389893011444, "loss": 1.9639, "step": 2585 }, { "epoch": 0.52, "learning_rate": 0.0004840613693916294, "loss": 1.9004, "step": 2586 }, { "epoch": 0.53, "learning_rate": 0.0004837328526595539, "loss": 2.001, "step": 2587 }, { "epoch": 0.53, "learning_rate": 0.00048340434295718283, "loss": 2.0293, "step": 2588 }, { "epoch": 0.53, "learning_rate": 0.00048307584042647875, "loss": 1.9902, "step": 2589 }, { "epoch": 0.53, "learning_rate": 0.0004827473452094013, "loss": 1.8955, "step": 2590 }, { "epoch": 0.53, "learning_rate": 0.0004824188574479067, "loss": 1.9873, "step": 2591 }, { "epoch": 0.53, "learning_rate": 0.0004820903772839481, "loss": 2.0176, "step": 2592 }, { "epoch": 0.53, "learning_rate": 0.0004817619048594752, "loss": 1.9395, "step": 2593 }, { "epoch": 0.53, "learning_rate": 0.0004814334403164346, "loss": 1.9844, "step": 2594 }, { "epoch": 0.53, "learning_rate": 0.00048110498379676914, "loss": 2.085, "step": 2595 }, { "epoch": 0.53, "learning_rate": 0.0004807765354424186, "loss": 2.0156, "step": 2596 }, { "epoch": 0.53, "learning_rate": 0.0004804480953953192, "loss": 2.0742, "step": 2597 }, { "epoch": 0.53, "learning_rate": 0.00048011966379740305, "loss": 1.9951, "step": 2598 }, { "epoch": 0.53, "learning_rate": 0.00047979124079059913, "loss": 1.9736, "step": 2599 }, { "epoch": 0.53, "learning_rate": 0.00047946282651683273, "loss": 2.1328, "step": 2600 }, { "epoch": 0.53, "learning_rate": 0.000479134421118025, "loss": 2.0225, "step": 2601 }, { "epoch": 0.53, "learning_rate": 0.00047880602473609324, "loss": 1.9219, "step": 2602 }, { "epoch": 0.53, "learning_rate": 0.00047847763751295144, "loss": 1.9033, "step": 2603 }, { "epoch": 0.53, "learning_rate": 0.00047814925959050917, "loss": 2.1104, "step": 2604 }, { "epoch": 0.53, "learning_rate": 0.00047782089111067175, "loss": 1.9268, "step": 2605 }, { "epoch": 0.53, "learning_rate": 0.00047749253221534107, "loss": 1.9814, "step": 2606 }, { "epoch": 0.53, "learning_rate": 0.0004771641830464146, "loss": 2.0107, "step": 2607 }, { "epoch": 0.53, "learning_rate": 0.00047683584374578504, "loss": 2.0156, "step": 2608 }, { "epoch": 0.53, "learning_rate": 0.00047650751445534175, "loss": 2.04, "step": 2609 }, { "epoch": 0.53, "learning_rate": 0.0004761791953169694, "loss": 2.0244, "step": 2610 }, { "epoch": 0.53, "learning_rate": 0.0004758508864725478, "loss": 1.9893, "step": 2611 }, { "epoch": 0.53, "learning_rate": 0.00047552258806395325, "loss": 2.0596, "step": 2612 }, { "epoch": 0.53, "learning_rate": 0.00047519430023305664, "loss": 1.9248, "step": 2613 }, { "epoch": 0.53, "learning_rate": 0.0004748660231217248, "loss": 2.1367, "step": 2614 }, { "epoch": 0.53, "learning_rate": 0.00047453775687181964, "loss": 1.9619, "step": 2615 }, { "epoch": 0.53, "learning_rate": 0.00047420950162519875, "loss": 1.9668, "step": 2616 }, { "epoch": 0.53, "learning_rate": 0.00047388125752371475, "loss": 2.0186, "step": 2617 }, { "epoch": 0.53, "learning_rate": 0.0004735530247092151, "loss": 2.0107, "step": 2618 }, { "epoch": 0.53, "learning_rate": 0.0004732248033235432, "loss": 1.9824, "step": 2619 }, { "epoch": 0.53, "learning_rate": 0.0004728965935085367, "loss": 1.9277, "step": 2620 }, { "epoch": 0.53, "learning_rate": 0.00047256839540602854, "loss": 2.0283, "step": 2621 }, { "epoch": 0.53, "learning_rate": 0.000472240209157847, "loss": 1.9707, "step": 2622 }, { "epoch": 0.53, "learning_rate": 0.0004719120349058145, "loss": 2.041, "step": 2623 }, { "epoch": 0.53, "learning_rate": 0.00047158387279174873, "loss": 2.042, "step": 2624 }, { "epoch": 0.53, "learning_rate": 0.0004712557229574624, "loss": 1.9814, "step": 2625 }, { "epoch": 0.53, "learning_rate": 0.0004709275855447621, "loss": 1.9805, "step": 2626 }, { "epoch": 0.53, "learning_rate": 0.00047059946069544966, "loss": 1.9805, "step": 2627 }, { "epoch": 0.53, "learning_rate": 0.00047027134855132167, "loss": 2.0391, "step": 2628 }, { "epoch": 0.53, "learning_rate": 0.00046994324925416864, "loss": 2.0332, "step": 2629 }, { "epoch": 0.53, "learning_rate": 0.0004696151629457756, "loss": 1.998, "step": 2630 }, { "epoch": 0.53, "learning_rate": 0.0004692870897679224, "loss": 1.9893, "step": 2631 }, { "epoch": 0.53, "learning_rate": 0.00046895902986238304, "loss": 2.0615, "step": 2632 }, { "epoch": 0.53, "learning_rate": 0.00046863098337092553, "loss": 2.0254, "step": 2633 }, { "epoch": 0.53, "learning_rate": 0.0004683029504353123, "loss": 2.1318, "step": 2634 }, { "epoch": 0.53, "learning_rate": 0.0004679749311973001, "loss": 1.9883, "step": 2635 }, { "epoch": 0.54, "learning_rate": 0.0004676469257986394, "loss": 2.0078, "step": 2636 }, { "epoch": 0.54, "learning_rate": 0.0004673189343810747, "loss": 1.9746, "step": 2637 }, { "epoch": 0.54, "learning_rate": 0.00046699095708634483, "loss": 2.0498, "step": 2638 }, { "epoch": 0.54, "learning_rate": 0.0004666629940561823, "loss": 1.8848, "step": 2639 }, { "epoch": 0.54, "learning_rate": 0.0004663350454323132, "loss": 2.0146, "step": 2640 }, { "epoch": 0.54, "learning_rate": 0.0004660071113564579, "loss": 2.0254, "step": 2641 }, { "epoch": 0.54, "learning_rate": 0.0004656791919703302, "loss": 1.9619, "step": 2642 }, { "epoch": 0.54, "learning_rate": 0.00046535128741563727, "loss": 1.9424, "step": 2643 }, { "epoch": 0.54, "learning_rate": 0.0004650233978340805, "loss": 1.9629, "step": 2644 }, { "epoch": 0.54, "learning_rate": 0.0004646955233673546, "loss": 1.9902, "step": 2645 }, { "epoch": 0.54, "learning_rate": 0.0004643676641571472, "loss": 1.9961, "step": 2646 }, { "epoch": 0.54, "learning_rate": 0.00046403982034514015, "loss": 1.9668, "step": 2647 }, { "epoch": 0.54, "learning_rate": 0.00046371199207300795, "loss": 2.0918, "step": 2648 }, { "epoch": 0.54, "learning_rate": 0.00046338417948241906, "loss": 1.9385, "step": 2649 }, { "epoch": 0.54, "learning_rate": 0.0004630563827150344, "loss": 2.001, "step": 2650 }, { "epoch": 0.54, "learning_rate": 0.00046272860191250875, "loss": 1.9688, "step": 2651 }, { "epoch": 0.54, "learning_rate": 0.00046240083721648973, "loss": 1.9355, "step": 2652 }, { "epoch": 0.54, "learning_rate": 0.00046207308876861764, "loss": 1.917, "step": 2653 }, { "epoch": 0.54, "learning_rate": 0.0004617453567105264, "loss": 2.0459, "step": 2654 }, { "epoch": 0.54, "learning_rate": 0.00046141764118384256, "loss": 2.0615, "step": 2655 }, { "epoch": 0.54, "learning_rate": 0.0004610899423301851, "loss": 2.0566, "step": 2656 }, { "epoch": 0.54, "learning_rate": 0.00046076226029116674, "loss": 2.0381, "step": 2657 }, { "epoch": 0.54, "learning_rate": 0.000460434595208392, "loss": 1.9893, "step": 2658 }, { "epoch": 0.54, "learning_rate": 0.0004601069472234584, "loss": 1.9834, "step": 2659 }, { "epoch": 0.54, "learning_rate": 0.0004597793164779566, "loss": 2.0273, "step": 2660 }, { "epoch": 0.54, "learning_rate": 0.0004594517031134689, "loss": 1.9824, "step": 2661 }, { "epoch": 0.54, "learning_rate": 0.0004591241072715706, "loss": 1.9775, "step": 2662 }, { "epoch": 0.54, "learning_rate": 0.0004587965290938296, "loss": 1.998, "step": 2663 }, { "epoch": 0.54, "learning_rate": 0.00045846896872180575, "loss": 1.9541, "step": 2664 }, { "epoch": 0.54, "learning_rate": 0.00045814142629705133, "loss": 2.041, "step": 2665 }, { "epoch": 0.54, "learning_rate": 0.0004578139019611109, "loss": 1.9551, "step": 2666 }, { "epoch": 0.54, "learning_rate": 0.00045748639585552143, "loss": 2.0791, "step": 2667 }, { "epoch": 0.54, "learning_rate": 0.0004571589081218116, "loss": 2.0918, "step": 2668 }, { "epoch": 0.54, "learning_rate": 0.00045683143890150237, "loss": 1.8779, "step": 2669 }, { "epoch": 0.54, "learning_rate": 0.00045650398833610683, "loss": 1.8643, "step": 2670 }, { "epoch": 0.54, "learning_rate": 0.0004561765565671297, "loss": 1.9961, "step": 2671 }, { "epoch": 0.54, "learning_rate": 0.0004558491437360677, "loss": 1.9209, "step": 2672 }, { "epoch": 0.54, "learning_rate": 0.0004555217499844097, "loss": 2.0791, "step": 2673 }, { "epoch": 0.54, "learning_rate": 0.0004551943754536358, "loss": 1.999, "step": 2674 }, { "epoch": 0.54, "learning_rate": 0.0004548670202852178, "loss": 2.0361, "step": 2675 }, { "epoch": 0.54, "learning_rate": 0.0004545396846206198, "loss": 2.0166, "step": 2676 }, { "epoch": 0.54, "learning_rate": 0.00045421236860129685, "loss": 1.9873, "step": 2677 }, { "epoch": 0.54, "learning_rate": 0.00045388507236869546, "loss": 1.9922, "step": 2678 }, { "epoch": 0.54, "learning_rate": 0.00045355779606425406, "loss": 1.9834, "step": 2679 }, { "epoch": 0.54, "learning_rate": 0.00045323053982940237, "loss": 2.002, "step": 2680 }, { "epoch": 0.54, "learning_rate": 0.0004529033038055609, "loss": 1.7871, "step": 2681 }, { "epoch": 0.54, "learning_rate": 0.0004525760881341421, "loss": 1.9414, "step": 2682 }, { "epoch": 0.54, "learning_rate": 0.00045224889295654924, "loss": 1.9717, "step": 2683 }, { "epoch": 0.54, "learning_rate": 0.00045192171841417693, "loss": 1.9561, "step": 2684 }, { "epoch": 0.54, "learning_rate": 0.0004515945646484105, "loss": 1.9551, "step": 2685 }, { "epoch": 0.55, "learning_rate": 0.0004512674318006268, "loss": 1.9346, "step": 2686 }, { "epoch": 0.55, "learning_rate": 0.00045094032001219355, "loss": 1.8955, "step": 2687 }, { "epoch": 0.55, "learning_rate": 0.0004506132294244687, "loss": 1.9209, "step": 2688 }, { "epoch": 0.55, "learning_rate": 0.0004502861601788022, "loss": 2.0293, "step": 2689 }, { "epoch": 0.55, "learning_rate": 0.00044995911241653387, "loss": 1.9404, "step": 2690 }, { "epoch": 0.55, "learning_rate": 0.00044963208627899425, "loss": 1.9336, "step": 2691 }, { "epoch": 0.55, "learning_rate": 0.00044930508190750535, "loss": 1.9111, "step": 2692 }, { "epoch": 0.55, "learning_rate": 0.0004489780994433788, "loss": 2.0479, "step": 2693 }, { "epoch": 0.55, "learning_rate": 0.0004486511390279172, "loss": 1.9287, "step": 2694 }, { "epoch": 0.55, "learning_rate": 0.00044832420080241403, "loss": 1.916, "step": 2695 }, { "epoch": 0.55, "learning_rate": 0.00044799728490815235, "loss": 1.9854, "step": 2696 }, { "epoch": 0.55, "learning_rate": 0.00044767039148640596, "loss": 2.0479, "step": 2697 }, { "epoch": 0.55, "learning_rate": 0.00044734352067843944, "loss": 1.96, "step": 2698 }, { "epoch": 0.55, "learning_rate": 0.0004470166726255068, "loss": 1.9463, "step": 2699 }, { "epoch": 0.55, "learning_rate": 0.0004466898474688524, "loss": 2.04, "step": 2700 }, { "epoch": 0.55, "learning_rate": 0.000446363045349711, "loss": 1.9561, "step": 2701 }, { "epoch": 0.55, "learning_rate": 0.0004460362664093075, "loss": 2.001, "step": 2702 }, { "epoch": 0.55, "learning_rate": 0.00044570951078885617, "loss": 1.9629, "step": 2703 }, { "epoch": 0.55, "learning_rate": 0.0004453827786295617, "loss": 1.9336, "step": 2704 }, { "epoch": 0.55, "learning_rate": 0.00044505607007261865, "loss": 2.0088, "step": 2705 }, { "epoch": 0.55, "learning_rate": 0.00044472938525921113, "loss": 2.0068, "step": 2706 }, { "epoch": 0.55, "learning_rate": 0.000444402724330513, "loss": 2.0498, "step": 2707 }, { "epoch": 0.55, "learning_rate": 0.00044407608742768825, "loss": 2.0195, "step": 2708 }, { "epoch": 0.55, "learning_rate": 0.00044374947469188986, "loss": 2.0, "step": 2709 }, { "epoch": 0.55, "learning_rate": 0.0004434228862642605, "loss": 2.0469, "step": 2710 }, { "epoch": 0.55, "learning_rate": 0.0004430963222859329, "loss": 2.0664, "step": 2711 }, { "epoch": 0.55, "learning_rate": 0.0004427697828980286, "loss": 1.9941, "step": 2712 }, { "epoch": 0.55, "learning_rate": 0.00044244326824165853, "loss": 1.9844, "step": 2713 }, { "epoch": 0.55, "learning_rate": 0.00044211677845792344, "loss": 2.0781, "step": 2714 }, { "epoch": 0.55, "learning_rate": 0.00044179031368791295, "loss": 1.9814, "step": 2715 }, { "epoch": 0.55, "learning_rate": 0.0004414638740727058, "loss": 1.9609, "step": 2716 }, { "epoch": 0.55, "learning_rate": 0.00044113745975336995, "loss": 2.0049, "step": 2717 }, { "epoch": 0.55, "learning_rate": 0.0004408110708709628, "loss": 1.9463, "step": 2718 }, { "epoch": 0.55, "learning_rate": 0.0004404847075665302, "loss": 2.0098, "step": 2719 }, { "epoch": 0.55, "learning_rate": 0.000440158369981107, "loss": 2.0029, "step": 2720 }, { "epoch": 0.55, "learning_rate": 0.00043983205825571744, "loss": 1.9805, "step": 2721 }, { "epoch": 0.55, "learning_rate": 0.00043950577253137424, "loss": 1.917, "step": 2722 }, { "epoch": 0.55, "learning_rate": 0.0004391795129490786, "loss": 1.959, "step": 2723 }, { "epoch": 0.55, "learning_rate": 0.00043885327964982105, "loss": 1.9707, "step": 2724 }, { "epoch": 0.55, "learning_rate": 0.00043852707277458033, "loss": 2.0596, "step": 2725 }, { "epoch": 0.55, "learning_rate": 0.0004382008924643237, "loss": 2.0439, "step": 2726 }, { "epoch": 0.55, "learning_rate": 0.00043787473886000745, "loss": 2.0439, "step": 2727 }, { "epoch": 0.55, "learning_rate": 0.00043754861210257566, "loss": 1.9531, "step": 2728 }, { "epoch": 0.55, "learning_rate": 0.0004372225123329613, "loss": 1.9053, "step": 2729 }, { "epoch": 0.55, "learning_rate": 0.00043689643969208557, "loss": 1.8955, "step": 2730 }, { "epoch": 0.55, "learning_rate": 0.00043657039432085776, "loss": 2.0381, "step": 2731 }, { "epoch": 0.55, "learning_rate": 0.00043624437636017547, "loss": 2.0879, "step": 2732 }, { "epoch": 0.55, "learning_rate": 0.0004359183859509247, "loss": 1.9219, "step": 2733 }, { "epoch": 0.55, "learning_rate": 0.00043559242323397917, "loss": 2.0898, "step": 2734 }, { "epoch": 0.56, "learning_rate": 0.000435266488350201, "loss": 1.9912, "step": 2735 }, { "epoch": 0.56, "learning_rate": 0.0004349405814404396, "loss": 1.9746, "step": 2736 }, { "epoch": 0.56, "learning_rate": 0.0004346147026455334, "loss": 1.9229, "step": 2737 }, { "epoch": 0.56, "learning_rate": 0.00043428885210630757, "loss": 2.0049, "step": 2738 }, { "epoch": 0.56, "learning_rate": 0.0004339630299635756, "loss": 1.8623, "step": 2739 }, { "epoch": 0.56, "learning_rate": 0.00043363723635813907, "loss": 2.0234, "step": 2740 }, { "epoch": 0.56, "learning_rate": 0.0004333114714307864, "loss": 2.041, "step": 2741 }, { "epoch": 0.56, "learning_rate": 0.0004329857353222941, "loss": 1.9121, "step": 2742 }, { "epoch": 0.56, "learning_rate": 0.00043266002817342634, "loss": 2.001, "step": 2743 }, { "epoch": 0.56, "learning_rate": 0.0004323343501249346, "loss": 1.8916, "step": 2744 }, { "epoch": 0.56, "learning_rate": 0.0004320087013175574, "loss": 1.9375, "step": 2745 }, { "epoch": 0.56, "learning_rate": 0.00043168308189202136, "loss": 2.0684, "step": 2746 }, { "epoch": 0.56, "learning_rate": 0.0004313574919890401, "loss": 2.0176, "step": 2747 }, { "epoch": 0.56, "learning_rate": 0.0004310319317493141, "loss": 1.9336, "step": 2748 }, { "epoch": 0.56, "learning_rate": 0.0004307064013135316, "loss": 1.8398, "step": 2749 }, { "epoch": 0.56, "learning_rate": 0.0004303809008223678, "loss": 1.999, "step": 2750 }, { "epoch": 0.56, "learning_rate": 0.0004300554304164846, "loss": 2.0371, "step": 2751 }, { "epoch": 0.56, "learning_rate": 0.0004297299902365311, "loss": 2.0215, "step": 2752 }, { "epoch": 0.56, "learning_rate": 0.00042940458042314394, "loss": 1.9893, "step": 2753 }, { "epoch": 0.56, "learning_rate": 0.00042907920111694576, "loss": 1.9775, "step": 2754 }, { "epoch": 0.56, "learning_rate": 0.0004287538524585461, "loss": 1.9121, "step": 2755 }, { "epoch": 0.56, "learning_rate": 0.0004284285345885419, "loss": 2.0527, "step": 2756 }, { "epoch": 0.56, "learning_rate": 0.00042810324764751647, "loss": 1.8789, "step": 2757 }, { "epoch": 0.56, "learning_rate": 0.0004277779917760393, "loss": 1.9512, "step": 2758 }, { "epoch": 0.56, "learning_rate": 0.00042745276711466713, "loss": 1.9766, "step": 2759 }, { "epoch": 0.56, "learning_rate": 0.000427127573803943, "loss": 1.9678, "step": 2760 }, { "epoch": 0.56, "learning_rate": 0.0004268024119843961, "loss": 1.9346, "step": 2761 }, { "epoch": 0.56, "learning_rate": 0.00042647728179654235, "loss": 2.0225, "step": 2762 }, { "epoch": 0.56, "learning_rate": 0.00042615218338088405, "loss": 2.0156, "step": 2763 }, { "epoch": 0.56, "learning_rate": 0.0004258271168779093, "loss": 1.9775, "step": 2764 }, { "epoch": 0.56, "learning_rate": 0.0004255020824280931, "loss": 1.8975, "step": 2765 }, { "epoch": 0.56, "learning_rate": 0.000425177080171896, "loss": 2.0322, "step": 2766 }, { "epoch": 0.56, "learning_rate": 0.00042485211024976497, "loss": 2.0352, "step": 2767 }, { "epoch": 0.56, "learning_rate": 0.00042452717280213253, "loss": 1.9814, "step": 2768 }, { "epoch": 0.56, "learning_rate": 0.000424202267969418, "loss": 2.0215, "step": 2769 }, { "epoch": 0.56, "learning_rate": 0.0004238773958920261, "loss": 2.0156, "step": 2770 }, { "epoch": 0.56, "learning_rate": 0.00042355255671034715, "loss": 1.9297, "step": 2771 }, { "epoch": 0.56, "learning_rate": 0.0004232277505647579, "loss": 2.0635, "step": 2772 }, { "epoch": 0.56, "learning_rate": 0.00042290297759562013, "loss": 1.9502, "step": 2773 }, { "epoch": 0.56, "learning_rate": 0.0004225782379432817, "loss": 1.9131, "step": 2774 }, { "epoch": 0.56, "learning_rate": 0.0004222535317480762, "loss": 1.9404, "step": 2775 }, { "epoch": 0.56, "learning_rate": 0.00042192885915032235, "loss": 1.9258, "step": 2776 }, { "epoch": 0.56, "learning_rate": 0.0004216042202903245, "loss": 1.8545, "step": 2777 }, { "epoch": 0.56, "learning_rate": 0.00042127961530837276, "loss": 2.0303, "step": 2778 }, { "epoch": 0.56, "learning_rate": 0.0004209550443447421, "loss": 2.0293, "step": 2779 }, { "epoch": 0.56, "learning_rate": 0.00042063050753969296, "loss": 1.9043, "step": 2780 }, { "epoch": 0.56, "learning_rate": 0.0004203060050334713, "loss": 1.8926, "step": 2781 }, { "epoch": 0.56, "learning_rate": 0.00041998153696630794, "loss": 1.9824, "step": 2782 }, { "epoch": 0.56, "learning_rate": 0.0004196571034784186, "loss": 1.9121, "step": 2783 }, { "epoch": 0.57, "learning_rate": 0.0004193327047100046, "loss": 1.9795, "step": 2784 }, { "epoch": 0.57, "learning_rate": 0.00041900834080125214, "loss": 1.9521, "step": 2785 }, { "epoch": 0.57, "learning_rate": 0.00041868401189233207, "loss": 2.0361, "step": 2786 }, { "epoch": 0.57, "learning_rate": 0.00041835971812340014, "loss": 1.9424, "step": 2787 }, { "epoch": 0.57, "learning_rate": 0.00041803545963459734, "loss": 1.8418, "step": 2788 }, { "epoch": 0.57, "learning_rate": 0.00041771123656604906, "loss": 1.8945, "step": 2789 }, { "epoch": 0.57, "learning_rate": 0.00041738704905786505, "loss": 1.8672, "step": 2790 }, { "epoch": 0.57, "learning_rate": 0.00041706289725014056, "loss": 1.9248, "step": 2791 }, { "epoch": 0.57, "learning_rate": 0.0004167387812829549, "loss": 1.957, "step": 2792 }, { "epoch": 0.57, "learning_rate": 0.00041641470129637155, "loss": 1.9443, "step": 2793 }, { "epoch": 0.57, "learning_rate": 0.00041609065743043917, "loss": 2.0312, "step": 2794 }, { "epoch": 0.57, "learning_rate": 0.00041576664982519054, "loss": 1.9512, "step": 2795 }, { "epoch": 0.57, "learning_rate": 0.0004154426786206423, "loss": 2.0059, "step": 2796 }, { "epoch": 0.57, "learning_rate": 0.00041511874395679603, "loss": 2.0195, "step": 2797 }, { "epoch": 0.57, "learning_rate": 0.00041479484597363735, "loss": 1.9697, "step": 2798 }, { "epoch": 0.57, "learning_rate": 0.00041447098481113553, "loss": 1.9033, "step": 2799 }, { "epoch": 0.57, "learning_rate": 0.00041414716060924483, "loss": 1.9404, "step": 2800 }, { "epoch": 0.57, "learning_rate": 0.00041382337350790257, "loss": 1.9648, "step": 2801 }, { "epoch": 0.57, "learning_rate": 0.00041349962364703083, "loss": 1.9795, "step": 2802 }, { "epoch": 0.57, "learning_rate": 0.00041317591116653486, "loss": 2.0967, "step": 2803 }, { "epoch": 0.57, "learning_rate": 0.0004128522362063045, "loss": 1.9551, "step": 2804 }, { "epoch": 0.57, "learning_rate": 0.0004125285989062131, "loss": 2.0049, "step": 2805 }, { "epoch": 0.57, "learning_rate": 0.00041220499940611727, "loss": 1.9043, "step": 2806 }, { "epoch": 0.57, "learning_rate": 0.00041188143784585816, "loss": 2.0156, "step": 2807 }, { "epoch": 0.57, "learning_rate": 0.00041155791436525967, "loss": 2.001, "step": 2808 }, { "epoch": 0.57, "learning_rate": 0.0004112344291041297, "loss": 1.9727, "step": 2809 }, { "epoch": 0.57, "learning_rate": 0.0004109109822022598, "loss": 2.0518, "step": 2810 }, { "epoch": 0.57, "learning_rate": 0.0004105875737994244, "loss": 1.9961, "step": 2811 }, { "epoch": 0.57, "learning_rate": 0.00041026420403538166, "loss": 1.9365, "step": 2812 }, { "epoch": 0.57, "learning_rate": 0.00040994087304987325, "loss": 1.9863, "step": 2813 }, { "epoch": 0.57, "learning_rate": 0.0004096175809826235, "loss": 1.9189, "step": 2814 }, { "epoch": 0.57, "learning_rate": 0.0004092943279733402, "loss": 1.9824, "step": 2815 }, { "epoch": 0.57, "learning_rate": 0.00040897111416171464, "loss": 1.9092, "step": 2816 }, { "epoch": 0.57, "learning_rate": 0.00040864793968742053, "loss": 2.0059, "step": 2817 }, { "epoch": 0.57, "learning_rate": 0.0004083248046901149, "loss": 1.8975, "step": 2818 }, { "epoch": 0.57, "learning_rate": 0.00040800170930943764, "loss": 1.9658, "step": 2819 }, { "epoch": 0.57, "learning_rate": 0.0004076786536850119, "loss": 1.8955, "step": 2820 }, { "epoch": 0.57, "learning_rate": 0.0004073556379564429, "loss": 1.8838, "step": 2821 }, { "epoch": 0.57, "learning_rate": 0.0004070326622633192, "loss": 2.0234, "step": 2822 }, { "epoch": 0.57, "learning_rate": 0.00040670972674521213, "loss": 1.9531, "step": 2823 }, { "epoch": 0.57, "learning_rate": 0.00040638683154167515, "loss": 1.9766, "step": 2824 }, { "epoch": 0.57, "learning_rate": 0.00040606397679224455, "loss": 1.9834, "step": 2825 }, { "epoch": 0.57, "learning_rate": 0.0004057411626364395, "loss": 1.9883, "step": 2826 }, { "epoch": 0.57, "learning_rate": 0.0004054183892137611, "loss": 1.8818, "step": 2827 }, { "epoch": 0.57, "learning_rate": 0.0004050956566636928, "loss": 1.8994, "step": 2828 }, { "epoch": 0.57, "learning_rate": 0.00040477296512570095, "loss": 1.9111, "step": 2829 }, { "epoch": 0.57, "learning_rate": 0.0004044503147392339, "loss": 1.9688, "step": 2830 }, { "epoch": 0.57, "learning_rate": 0.0004041277056437218, "loss": 2.0293, "step": 2831 }, { "epoch": 0.57, "learning_rate": 0.00040380513797857765, "loss": 1.9307, "step": 2832 }, { "epoch": 0.57, "learning_rate": 0.0004034826118831963, "loss": 1.8916, "step": 2833 }, { "epoch": 0.58, "learning_rate": 0.0004031601274969542, "loss": 1.9941, "step": 2834 }, { "epoch": 0.58, "learning_rate": 0.00040283768495921046, "loss": 1.9355, "step": 2835 }, { "epoch": 0.58, "learning_rate": 0.0004025152844093056, "loss": 2.0264, "step": 2836 }, { "epoch": 0.58, "learning_rate": 0.00040219292598656244, "loss": 2.0029, "step": 2837 }, { "epoch": 0.58, "learning_rate": 0.00040187060983028477, "loss": 1.9736, "step": 2838 }, { "epoch": 0.58, "learning_rate": 0.0004015483360797593, "loss": 2.001, "step": 2839 }, { "epoch": 0.58, "learning_rate": 0.0004012261048742536, "loss": 2.0254, "step": 2840 }, { "epoch": 0.58, "learning_rate": 0.0004009039163530167, "loss": 1.9893, "step": 2841 }, { "epoch": 0.58, "learning_rate": 0.00040058177065528, "loss": 2.0234, "step": 2842 }, { "epoch": 0.58, "learning_rate": 0.00040025966792025586, "loss": 2.0361, "step": 2843 }, { "epoch": 0.58, "learning_rate": 0.00039993760828713784, "loss": 1.8809, "step": 2844 }, { "epoch": 0.58, "learning_rate": 0.00039961559189510154, "loss": 2.0312, "step": 2845 }, { "epoch": 0.58, "learning_rate": 0.00039929361888330317, "loss": 1.9844, "step": 2846 }, { "epoch": 0.58, "learning_rate": 0.00039897168939088067, "loss": 1.9336, "step": 2847 }, { "epoch": 0.58, "learning_rate": 0.00039864980355695323, "loss": 1.8994, "step": 2848 }, { "epoch": 0.58, "learning_rate": 0.0003983279615206208, "loss": 1.9521, "step": 2849 }, { "epoch": 0.58, "learning_rate": 0.0003980061634209643, "loss": 2.0059, "step": 2850 }, { "epoch": 0.58, "learning_rate": 0.00039768440939704663, "loss": 2.002, "step": 2851 }, { "epoch": 0.58, "learning_rate": 0.0003973626995879105, "loss": 1.9365, "step": 2852 }, { "epoch": 0.58, "learning_rate": 0.0003970410341325799, "loss": 1.9609, "step": 2853 }, { "epoch": 0.58, "learning_rate": 0.00039671941317005976, "loss": 2.0361, "step": 2854 }, { "epoch": 0.58, "learning_rate": 0.0003963978368393361, "loss": 1.9775, "step": 2855 }, { "epoch": 0.58, "learning_rate": 0.000396076305279375, "loss": 1.9531, "step": 2856 }, { "epoch": 0.58, "learning_rate": 0.0003957548186291233, "loss": 1.9609, "step": 2857 }, { "epoch": 0.58, "learning_rate": 0.00039543337702750914, "loss": 1.9053, "step": 2858 }, { "epoch": 0.58, "learning_rate": 0.00039511198061344035, "loss": 2.0459, "step": 2859 }, { "epoch": 0.58, "learning_rate": 0.0003947906295258054, "loss": 1.9404, "step": 2860 }, { "epoch": 0.58, "learning_rate": 0.00039446932390347386, "loss": 1.9482, "step": 2861 }, { "epoch": 0.58, "learning_rate": 0.00039414806388529477, "loss": 1.9463, "step": 2862 }, { "epoch": 0.58, "learning_rate": 0.0003938268496100976, "loss": 1.8555, "step": 2863 }, { "epoch": 0.58, "learning_rate": 0.00039350568121669275, "loss": 1.9072, "step": 2864 }, { "epoch": 0.58, "learning_rate": 0.0003931845588438701, "loss": 1.9307, "step": 2865 }, { "epoch": 0.58, "learning_rate": 0.00039286348263039975, "loss": 2.0732, "step": 2866 }, { "epoch": 0.58, "learning_rate": 0.0003925424527150321, "loss": 2.0098, "step": 2867 }, { "epoch": 0.58, "learning_rate": 0.0003922214692364974, "loss": 1.9736, "step": 2868 }, { "epoch": 0.58, "learning_rate": 0.00039190053233350577, "loss": 2.0039, "step": 2869 }, { "epoch": 0.58, "learning_rate": 0.0003915796421447472, "loss": 1.9395, "step": 2870 }, { "epoch": 0.58, "learning_rate": 0.0003912587988088918, "loss": 1.9893, "step": 2871 }, { "epoch": 0.58, "learning_rate": 0.0003909380024645891, "loss": 1.9736, "step": 2872 }, { "epoch": 0.58, "learning_rate": 0.0003906172532504681, "loss": 2.0049, "step": 2873 }, { "epoch": 0.58, "learning_rate": 0.0003902965513051382, "loss": 2.0938, "step": 2874 }, { "epoch": 0.58, "learning_rate": 0.00038997589676718783, "loss": 1.9072, "step": 2875 }, { "epoch": 0.58, "learning_rate": 0.0003896552897751846, "loss": 2.0488, "step": 2876 }, { "epoch": 0.58, "learning_rate": 0.00038933473046767655, "loss": 1.9727, "step": 2877 }, { "epoch": 0.58, "learning_rate": 0.0003890142189831904, "loss": 1.9395, "step": 2878 }, { "epoch": 0.58, "learning_rate": 0.00038869375546023204, "loss": 1.8867, "step": 2879 }, { "epoch": 0.58, "learning_rate": 0.0003883733400372875, "loss": 1.9971, "step": 2880 }, { "epoch": 0.58, "learning_rate": 0.0003880529728528211, "loss": 1.9824, "step": 2881 }, { "epoch": 0.58, "learning_rate": 0.0003877326540452767, "loss": 2.0391, "step": 2882 }, { "epoch": 0.59, "learning_rate": 0.0003874123837530775, "loss": 1.9521, "step": 2883 }, { "epoch": 0.59, "learning_rate": 0.00038709216211462534, "loss": 1.9473, "step": 2884 }, { "epoch": 0.59, "learning_rate": 0.0003867719892683013, "loss": 1.9844, "step": 2885 }, { "epoch": 0.59, "learning_rate": 0.0003864518653524648, "loss": 1.9893, "step": 2886 }, { "epoch": 0.59, "learning_rate": 0.0003861317905054551, "loss": 1.8955, "step": 2887 }, { "epoch": 0.59, "learning_rate": 0.00038581176486558966, "loss": 1.9521, "step": 2888 }, { "epoch": 0.59, "learning_rate": 0.0003854917885711644, "loss": 2.0098, "step": 2889 }, { "epoch": 0.59, "learning_rate": 0.0003851718617604547, "loss": 2.043, "step": 2890 }, { "epoch": 0.59, "learning_rate": 0.0003848519845717138, "loss": 1.9307, "step": 2891 }, { "epoch": 0.59, "learning_rate": 0.0003845321571431737, "loss": 2.0244, "step": 2892 }, { "epoch": 0.59, "learning_rate": 0.0003842123796130455, "loss": 1.9473, "step": 2893 }, { "epoch": 0.59, "learning_rate": 0.0003838926521195178, "loss": 1.9531, "step": 2894 }, { "epoch": 0.59, "learning_rate": 0.0003835729748007582, "loss": 1.9707, "step": 2895 }, { "epoch": 0.59, "learning_rate": 0.00038325334779491256, "loss": 1.9746, "step": 2896 }, { "epoch": 0.59, "learning_rate": 0.0003829337712401048, "loss": 1.9844, "step": 2897 }, { "epoch": 0.59, "learning_rate": 0.0003826142452744367, "loss": 1.8311, "step": 2898 }, { "epoch": 0.59, "learning_rate": 0.0003822947700359891, "loss": 1.9658, "step": 2899 }, { "epoch": 0.59, "learning_rate": 0.0003819753456628204, "loss": 2.0146, "step": 2900 }, { "epoch": 0.59, "learning_rate": 0.0003816559722929667, "loss": 1.9619, "step": 2901 }, { "epoch": 0.59, "learning_rate": 0.00038133665006444257, "loss": 1.9072, "step": 2902 }, { "epoch": 0.59, "learning_rate": 0.0003810173791152405, "loss": 1.9561, "step": 2903 }, { "epoch": 0.59, "learning_rate": 0.0003806981595833303, "loss": 1.8965, "step": 2904 }, { "epoch": 0.59, "learning_rate": 0.00038037899160665986, "loss": 1.9277, "step": 2905 }, { "epoch": 0.59, "learning_rate": 0.00038005987532315513, "loss": 1.9297, "step": 2906 }, { "epoch": 0.59, "learning_rate": 0.00037974081087071925, "loss": 2.0049, "step": 2907 }, { "epoch": 0.59, "learning_rate": 0.00037942179838723296, "loss": 1.9756, "step": 2908 }, { "epoch": 0.59, "learning_rate": 0.00037910283801055486, "loss": 1.9609, "step": 2909 }, { "epoch": 0.59, "learning_rate": 0.00037878392987852097, "loss": 1.9951, "step": 2910 }, { "epoch": 0.59, "learning_rate": 0.0003784650741289442, "loss": 1.9971, "step": 2911 }, { "epoch": 0.59, "learning_rate": 0.0003781462708996157, "loss": 1.9277, "step": 2912 }, { "epoch": 0.59, "learning_rate": 0.00037782752032830346, "loss": 1.9404, "step": 2913 }, { "epoch": 0.59, "learning_rate": 0.00037750882255275245, "loss": 1.9482, "step": 2914 }, { "epoch": 0.59, "learning_rate": 0.00037719017771068537, "loss": 1.9512, "step": 2915 }, { "epoch": 0.59, "learning_rate": 0.0003768715859398016, "loss": 1.9072, "step": 2916 }, { "epoch": 0.59, "learning_rate": 0.0003765530473777777, "loss": 2.0596, "step": 2917 }, { "epoch": 0.59, "learning_rate": 0.0003762345621622677, "loss": 1.8887, "step": 2918 }, { "epoch": 0.59, "learning_rate": 0.00037591613043090177, "loss": 1.9492, "step": 2919 }, { "epoch": 0.59, "learning_rate": 0.00037559775232128765, "loss": 1.9688, "step": 2920 }, { "epoch": 0.59, "learning_rate": 0.0003752794279710094, "loss": 1.9482, "step": 2921 }, { "epoch": 0.59, "learning_rate": 0.0003749611575176282, "loss": 1.9922, "step": 2922 }, { "epoch": 0.59, "learning_rate": 0.000374642941098682, "loss": 2.001, "step": 2923 }, { "epoch": 0.59, "learning_rate": 0.0003743247788516848, "loss": 1.9033, "step": 2924 }, { "epoch": 0.59, "learning_rate": 0.0003740066709141281, "loss": 2.0664, "step": 2925 }, { "epoch": 0.59, "learning_rate": 0.00037368861742347916, "loss": 1.9805, "step": 2926 }, { "epoch": 0.59, "learning_rate": 0.0003733706185171819, "loss": 1.9072, "step": 2927 }, { "epoch": 0.59, "learning_rate": 0.0003730526743326571, "loss": 1.9502, "step": 2928 }, { "epoch": 0.59, "learning_rate": 0.0003727347850073012, "loss": 1.916, "step": 2929 }, { "epoch": 0.59, "learning_rate": 0.00037241695067848736, "loss": 1.9717, "step": 2930 }, { "epoch": 0.59, "learning_rate": 0.0003720991714835651, "loss": 1.917, "step": 2931 }, { "epoch": 0.6, "learning_rate": 0.0003717814475598597, "loss": 2.0, "step": 2932 }, { "epoch": 0.6, "learning_rate": 0.0003714637790446726, "loss": 1.9033, "step": 2933 }, { "epoch": 0.6, "learning_rate": 0.0003711461660752819, "loss": 1.874, "step": 2934 }, { "epoch": 0.6, "learning_rate": 0.000370828608788941, "loss": 1.96, "step": 2935 }, { "epoch": 0.6, "learning_rate": 0.00037051110732287933, "loss": 1.959, "step": 2936 }, { "epoch": 0.6, "learning_rate": 0.00037019366181430247, "loss": 1.8506, "step": 2937 }, { "epoch": 0.6, "learning_rate": 0.0003698762724003919, "loss": 1.9385, "step": 2938 }, { "epoch": 0.6, "learning_rate": 0.0003695589392183043, "loss": 1.8965, "step": 2939 }, { "epoch": 0.6, "learning_rate": 0.0003692416624051727, "loss": 1.8291, "step": 2940 }, { "epoch": 0.6, "learning_rate": 0.0003689244420981054, "loss": 1.9014, "step": 2941 }, { "epoch": 0.6, "learning_rate": 0.0003686072784341864, "loss": 1.9102, "step": 2942 }, { "epoch": 0.6, "learning_rate": 0.00036829017155047496, "loss": 1.9756, "step": 2943 }, { "epoch": 0.6, "learning_rate": 0.0003679731215840063, "loss": 1.9297, "step": 2944 }, { "epoch": 0.6, "learning_rate": 0.0003676561286717908, "loss": 1.8652, "step": 2945 }, { "epoch": 0.6, "learning_rate": 0.00036733919295081375, "loss": 1.9697, "step": 2946 }, { "epoch": 0.6, "learning_rate": 0.00036702231455803657, "loss": 1.957, "step": 2947 }, { "epoch": 0.6, "learning_rate": 0.0003667054936303954, "loss": 1.9062, "step": 2948 }, { "epoch": 0.6, "learning_rate": 0.00036638873030480127, "loss": 2.04, "step": 2949 }, { "epoch": 0.6, "learning_rate": 0.00036607202471814106, "loss": 1.8906, "step": 2950 }, { "epoch": 0.6, "learning_rate": 0.00036575537700727625, "loss": 1.9287, "step": 2951 }, { "epoch": 0.6, "learning_rate": 0.000365438787309043, "loss": 1.9697, "step": 2952 }, { "epoch": 0.6, "learning_rate": 0.00036512225576025326, "loss": 2.0273, "step": 2953 }, { "epoch": 0.6, "learning_rate": 0.00036480578249769305, "loss": 1.916, "step": 2954 }, { "epoch": 0.6, "learning_rate": 0.0003644893676581237, "loss": 1.998, "step": 2955 }, { "epoch": 0.6, "learning_rate": 0.00036417301137828067, "loss": 2.0264, "step": 2956 }, { "epoch": 0.6, "learning_rate": 0.00036385671379487495, "loss": 1.9668, "step": 2957 }, { "epoch": 0.6, "learning_rate": 0.0003635404750445918, "loss": 1.9238, "step": 2958 }, { "epoch": 0.6, "learning_rate": 0.0003632242952640906, "loss": 2.0293, "step": 2959 }, { "epoch": 0.6, "learning_rate": 0.0003629081745900062, "loss": 1.8926, "step": 2960 }, { "epoch": 0.6, "learning_rate": 0.0003625921131589469, "loss": 1.9453, "step": 2961 }, { "epoch": 0.6, "learning_rate": 0.00036227611110749595, "loss": 1.9082, "step": 2962 }, { "epoch": 0.6, "learning_rate": 0.00036196016857221113, "loss": 1.8994, "step": 2963 }, { "epoch": 0.6, "learning_rate": 0.000361644285689624, "loss": 1.96, "step": 2964 }, { "epoch": 0.6, "learning_rate": 0.0003613284625962405, "loss": 2.0498, "step": 2965 }, { "epoch": 0.6, "learning_rate": 0.0003610126994285411, "loss": 2.0303, "step": 2966 }, { "epoch": 0.6, "learning_rate": 0.00036069699632297977, "loss": 1.9336, "step": 2967 }, { "epoch": 0.6, "learning_rate": 0.00036038135341598485, "loss": 1.9756, "step": 2968 }, { "epoch": 0.6, "learning_rate": 0.0003600657708439589, "loss": 1.8848, "step": 2969 }, { "epoch": 0.6, "learning_rate": 0.00035975024874327797, "loss": 2.0605, "step": 2970 }, { "epoch": 0.6, "learning_rate": 0.0003594347872502921, "loss": 1.9443, "step": 2971 }, { "epoch": 0.6, "learning_rate": 0.0003591193865013251, "loss": 1.958, "step": 2972 }, { "epoch": 0.6, "learning_rate": 0.00035880404663267515, "loss": 1.96, "step": 2973 }, { "epoch": 0.6, "learning_rate": 0.000358488767780613, "loss": 1.9795, "step": 2974 }, { "epoch": 0.6, "learning_rate": 0.00035817355008138386, "loss": 1.9512, "step": 2975 }, { "epoch": 0.6, "learning_rate": 0.00035785839367120643, "loss": 2.0078, "step": 2976 }, { "epoch": 0.6, "learning_rate": 0.00035754329868627254, "loss": 1.9883, "step": 2977 }, { "epoch": 0.6, "learning_rate": 0.0003572282652627477, "loss": 2.0137, "step": 2978 }, { "epoch": 0.6, "learning_rate": 0.00035691329353677116, "loss": 1.9551, "step": 2979 }, { "epoch": 0.6, "learning_rate": 0.00035659838364445503, "loss": 2.0137, "step": 2980 }, { "epoch": 0.61, "learning_rate": 0.0003562835357218845, "loss": 1.9004, "step": 2981 }, { "epoch": 0.61, "learning_rate": 0.00035596874990511876, "loss": 1.9355, "step": 2982 }, { "epoch": 0.61, "learning_rate": 0.0003556540263301896, "loss": 1.9521, "step": 2983 }, { "epoch": 0.61, "learning_rate": 0.00035533936513310185, "loss": 1.834, "step": 2984 }, { "epoch": 0.61, "learning_rate": 0.00035502476644983384, "loss": 1.9629, "step": 2985 }, { "epoch": 0.61, "learning_rate": 0.00035471023041633667, "loss": 2.0791, "step": 2986 }, { "epoch": 0.61, "learning_rate": 0.0003543957571685341, "loss": 1.9043, "step": 2987 }, { "epoch": 0.61, "learning_rate": 0.0003540813468423229, "loss": 1.96, "step": 2988 }, { "epoch": 0.61, "learning_rate": 0.00035376699957357293, "loss": 2.0117, "step": 2989 }, { "epoch": 0.61, "learning_rate": 0.0003534527154981267, "loss": 1.9971, "step": 2990 }, { "epoch": 0.61, "learning_rate": 0.0003531384947517989, "loss": 1.9678, "step": 2991 }, { "epoch": 0.61, "learning_rate": 0.00035282433747037755, "loss": 1.8291, "step": 2992 }, { "epoch": 0.61, "learning_rate": 0.0003525102437896231, "loss": 1.9473, "step": 2993 }, { "epoch": 0.61, "learning_rate": 0.0003521962138452679, "loss": 2.002, "step": 2994 }, { "epoch": 0.61, "learning_rate": 0.00035188224777301763, "loss": 1.9521, "step": 2995 }, { "epoch": 0.61, "learning_rate": 0.0003515683457085499, "loss": 1.9482, "step": 2996 }, { "epoch": 0.61, "learning_rate": 0.0003512545077875145, "loss": 1.917, "step": 2997 }, { "epoch": 0.61, "learning_rate": 0.0003509407341455341, "loss": 1.9102, "step": 2998 }, { "epoch": 0.61, "learning_rate": 0.0003506270249182028, "loss": 1.9844, "step": 2999 }, { "epoch": 0.61, "learning_rate": 0.00035031338024108747, "loss": 1.9014, "step": 3000 }, { "epoch": 0.61, "learning_rate": 0.000349999800249727, "loss": 1.9805, "step": 3001 }, { "epoch": 0.61, "learning_rate": 0.00034968628507963214, "loss": 1.9336, "step": 3002 }, { "epoch": 0.61, "learning_rate": 0.0003493728348662855, "loss": 1.9121, "step": 3003 }, { "epoch": 0.61, "learning_rate": 0.00034905944974514226, "loss": 1.9316, "step": 3004 }, { "epoch": 0.61, "learning_rate": 0.0003487461298516288, "loss": 1.9375, "step": 3005 }, { "epoch": 0.61, "learning_rate": 0.0003484328753211435, "loss": 1.915, "step": 3006 }, { "epoch": 0.61, "learning_rate": 0.00034811968628905643, "loss": 2.0566, "step": 3007 }, { "epoch": 0.61, "learning_rate": 0.0003478065628907099, "loss": 2.0479, "step": 3008 }, { "epoch": 0.61, "learning_rate": 0.00034749350526141706, "loss": 2.0234, "step": 3009 }, { "epoch": 0.61, "learning_rate": 0.000347180513536463, "loss": 1.9297, "step": 3010 }, { "epoch": 0.61, "learning_rate": 0.0003468675878511047, "loss": 1.9336, "step": 3011 }, { "epoch": 0.61, "learning_rate": 0.0003465547283405699, "loss": 1.9375, "step": 3012 }, { "epoch": 0.61, "learning_rate": 0.00034624193514005806, "loss": 1.9102, "step": 3013 }, { "epoch": 0.61, "learning_rate": 0.0003459292083847404, "loss": 1.8984, "step": 3014 }, { "epoch": 0.61, "learning_rate": 0.0003456165482097587, "loss": 1.8281, "step": 3015 }, { "epoch": 0.61, "learning_rate": 0.00034530395475022613, "loss": 1.9619, "step": 3016 }, { "epoch": 0.61, "learning_rate": 0.00034499142814122766, "loss": 1.9365, "step": 3017 }, { "epoch": 0.61, "learning_rate": 0.00034467896851781863, "loss": 1.9619, "step": 3018 }, { "epoch": 0.61, "learning_rate": 0.00034436657601502575, "loss": 1.9922, "step": 3019 }, { "epoch": 0.61, "learning_rate": 0.0003440542507678467, "loss": 1.9893, "step": 3020 }, { "epoch": 0.61, "learning_rate": 0.0003437419929112503, "loss": 1.9268, "step": 3021 }, { "epoch": 0.61, "learning_rate": 0.00034342980258017573, "loss": 1.9854, "step": 3022 }, { "epoch": 0.61, "learning_rate": 0.00034311767990953323, "loss": 1.9424, "step": 3023 }, { "epoch": 0.61, "learning_rate": 0.0003428056250342042, "loss": 2.0234, "step": 3024 }, { "epoch": 0.61, "learning_rate": 0.0003424936380890403, "loss": 1.9336, "step": 3025 }, { "epoch": 0.61, "learning_rate": 0.00034218171920886363, "loss": 1.9619, "step": 3026 }, { "epoch": 0.61, "learning_rate": 0.0003418698685284676, "loss": 1.874, "step": 3027 }, { "epoch": 0.61, "learning_rate": 0.0003415580861826156, "loss": 2.0596, "step": 3028 }, { "epoch": 0.61, "learning_rate": 0.0003412463723060415, "loss": 2.0312, "step": 3029 }, { "epoch": 0.61, "learning_rate": 0.00034093472703344987, "loss": 1.8945, "step": 3030 }, { "epoch": 0.62, "learning_rate": 0.0003406231504995155, "loss": 2.0049, "step": 3031 }, { "epoch": 0.62, "learning_rate": 0.0003403116428388832, "loss": 1.9336, "step": 3032 }, { "epoch": 0.62, "learning_rate": 0.0003400002041861685, "loss": 1.9434, "step": 3033 }, { "epoch": 0.62, "learning_rate": 0.00033968883467595676, "loss": 1.8623, "step": 3034 }, { "epoch": 0.62, "learning_rate": 0.0003393775344428035, "loss": 2.0156, "step": 3035 }, { "epoch": 0.62, "learning_rate": 0.00033906630362123466, "loss": 1.9443, "step": 3036 }, { "epoch": 0.62, "learning_rate": 0.00033875514234574553, "loss": 2.001, "step": 3037 }, { "epoch": 0.62, "learning_rate": 0.0003384440507508022, "loss": 1.9639, "step": 3038 }, { "epoch": 0.62, "learning_rate": 0.0003381330289708395, "loss": 1.9609, "step": 3039 }, { "epoch": 0.62, "learning_rate": 0.00033782207714026324, "loss": 1.8701, "step": 3040 }, { "epoch": 0.62, "learning_rate": 0.0003375111953934485, "loss": 2.0059, "step": 3041 }, { "epoch": 0.62, "learning_rate": 0.0003372003838647398, "loss": 1.9541, "step": 3042 }, { "epoch": 0.62, "learning_rate": 0.00033688964268845204, "loss": 1.9082, "step": 3043 }, { "epoch": 0.62, "learning_rate": 0.0003365789719988689, "loss": 1.916, "step": 3044 }, { "epoch": 0.62, "learning_rate": 0.00033626837193024395, "loss": 1.8643, "step": 3045 }, { "epoch": 0.62, "learning_rate": 0.0003359578426168007, "loss": 1.9746, "step": 3046 }, { "epoch": 0.62, "learning_rate": 0.0003356473841927313, "loss": 1.9297, "step": 3047 }, { "epoch": 0.62, "learning_rate": 0.0003353369967921976, "loss": 1.9795, "step": 3048 }, { "epoch": 0.62, "learning_rate": 0.00033502668054933107, "loss": 1.9932, "step": 3049 }, { "epoch": 0.62, "learning_rate": 0.00033471643559823206, "loss": 1.9082, "step": 3050 }, { "epoch": 0.62, "learning_rate": 0.00033440626207296986, "loss": 1.7656, "step": 3051 }, { "epoch": 0.62, "learning_rate": 0.00033409616010758353, "loss": 2.0488, "step": 3052 }, { "epoch": 0.62, "learning_rate": 0.00033378612983608105, "loss": 1.9609, "step": 3053 }, { "epoch": 0.62, "learning_rate": 0.0003334761713924388, "loss": 1.9746, "step": 3054 }, { "epoch": 0.62, "learning_rate": 0.00033316628491060295, "loss": 1.8984, "step": 3055 }, { "epoch": 0.62, "learning_rate": 0.0003328564705244883, "loss": 1.9336, "step": 3056 }, { "epoch": 0.62, "learning_rate": 0.0003325467283679781, "loss": 1.9893, "step": 3057 }, { "epoch": 0.62, "learning_rate": 0.00033223705857492473, "loss": 1.8486, "step": 3058 }, { "epoch": 0.62, "learning_rate": 0.0003319274612791496, "loss": 1.9199, "step": 3059 }, { "epoch": 0.62, "learning_rate": 0.00033161793661444223, "loss": 1.9834, "step": 3060 }, { "epoch": 0.62, "learning_rate": 0.00033130848471456075, "loss": 1.9639, "step": 3061 }, { "epoch": 0.62, "learning_rate": 0.0003309991057132324, "loss": 1.8223, "step": 3062 }, { "epoch": 0.62, "learning_rate": 0.0003306897997441526, "loss": 2.0068, "step": 3063 }, { "epoch": 0.62, "learning_rate": 0.00033038056694098485, "loss": 1.9941, "step": 3064 }, { "epoch": 0.62, "learning_rate": 0.00033007140743736177, "loss": 1.9297, "step": 3065 }, { "epoch": 0.62, "learning_rate": 0.0003297623213668839, "loss": 1.9297, "step": 3066 }, { "epoch": 0.62, "learning_rate": 0.00032945330886311966, "loss": 1.9766, "step": 3067 }, { "epoch": 0.62, "learning_rate": 0.00032914437005960657, "loss": 2.0518, "step": 3068 }, { "epoch": 0.62, "learning_rate": 0.0003288355050898495, "loss": 1.9619, "step": 3069 }, { "epoch": 0.62, "learning_rate": 0.00032852671408732176, "loss": 1.9209, "step": 3070 }, { "epoch": 0.62, "learning_rate": 0.00032821799718546496, "loss": 1.9941, "step": 3071 }, { "epoch": 0.62, "learning_rate": 0.00032790935451768797, "loss": 1.8916, "step": 3072 }, { "epoch": 0.62, "learning_rate": 0.00032760078621736833, "loss": 1.9941, "step": 3073 }, { "epoch": 0.62, "learning_rate": 0.00032729229241785086, "loss": 1.9082, "step": 3074 }, { "epoch": 0.62, "learning_rate": 0.0003269838732524485, "loss": 2.0098, "step": 3075 }, { "epoch": 0.62, "learning_rate": 0.0003266755288544422, "loss": 2.0322, "step": 3076 }, { "epoch": 0.62, "learning_rate": 0.0003263672593570797, "loss": 1.9268, "step": 3077 }, { "epoch": 0.62, "learning_rate": 0.0003260590648935775, "loss": 1.9199, "step": 3078 }, { "epoch": 0.62, "learning_rate": 0.00032575094559711876, "loss": 1.8594, "step": 3079 }, { "epoch": 0.63, "learning_rate": 0.00032544290160085453, "loss": 2.04, "step": 3080 }, { "epoch": 0.63, "learning_rate": 0.0003251349330379037, "loss": 1.9619, "step": 3081 }, { "epoch": 0.63, "learning_rate": 0.00032482704004135177, "loss": 1.8682, "step": 3082 }, { "epoch": 0.63, "learning_rate": 0.0003245192227442519, "loss": 2.0078, "step": 3083 }, { "epoch": 0.63, "learning_rate": 0.0003242114812796252, "loss": 1.9697, "step": 3084 }, { "epoch": 0.63, "learning_rate": 0.0003239038157804589, "loss": 1.9043, "step": 3085 }, { "epoch": 0.63, "learning_rate": 0.00032359622637970805, "loss": 2.0049, "step": 3086 }, { "epoch": 0.63, "learning_rate": 0.00032328871321029494, "loss": 1.9941, "step": 3087 }, { "epoch": 0.63, "learning_rate": 0.00032298127640510845, "loss": 1.9053, "step": 3088 }, { "epoch": 0.63, "learning_rate": 0.00032267391609700467, "loss": 1.8271, "step": 3089 }, { "epoch": 0.63, "learning_rate": 0.0003223666324188067, "loss": 1.9736, "step": 3090 }, { "epoch": 0.63, "learning_rate": 0.0003220594255033046, "loss": 1.9824, "step": 3091 }, { "epoch": 0.63, "learning_rate": 0.00032175229548325496, "loss": 1.9805, "step": 3092 }, { "epoch": 0.63, "learning_rate": 0.0003214452424913813, "loss": 1.9551, "step": 3093 }, { "epoch": 0.63, "learning_rate": 0.0003211382666603741, "loss": 1.999, "step": 3094 }, { "epoch": 0.63, "learning_rate": 0.0003208313681228902, "loss": 1.9248, "step": 3095 }, { "epoch": 0.63, "learning_rate": 0.0003205245470115527, "loss": 2.043, "step": 3096 }, { "epoch": 0.63, "learning_rate": 0.0003202178034589521, "loss": 1.8545, "step": 3097 }, { "epoch": 0.63, "learning_rate": 0.0003199111375976449, "loss": 2.0273, "step": 3098 }, { "epoch": 0.63, "learning_rate": 0.0003196045495601537, "loss": 1.9453, "step": 3099 }, { "epoch": 0.63, "learning_rate": 0.0003192980394789682, "loss": 1.8779, "step": 3100 }, { "epoch": 0.63, "learning_rate": 0.000318991607486544, "loss": 1.9912, "step": 3101 }, { "epoch": 0.63, "learning_rate": 0.0003186852537153026, "loss": 1.9541, "step": 3102 }, { "epoch": 0.63, "learning_rate": 0.00031837897829763254, "loss": 1.9824, "step": 3103 }, { "epoch": 0.63, "learning_rate": 0.0003180727813658879, "loss": 1.9551, "step": 3104 }, { "epoch": 0.63, "learning_rate": 0.0003177666630523888, "loss": 1.8848, "step": 3105 }, { "epoch": 0.63, "learning_rate": 0.00031746062348942186, "loss": 1.8906, "step": 3106 }, { "epoch": 0.63, "learning_rate": 0.00031715466280923924, "loss": 1.9424, "step": 3107 }, { "epoch": 0.63, "learning_rate": 0.0003168487811440591, "loss": 1.9658, "step": 3108 }, { "epoch": 0.63, "learning_rate": 0.0003165429786260655, "loss": 1.9346, "step": 3109 }, { "epoch": 0.63, "learning_rate": 0.0003162372553874085, "loss": 1.9248, "step": 3110 }, { "epoch": 0.63, "learning_rate": 0.0003159316115602036, "loss": 1.9004, "step": 3111 }, { "epoch": 0.63, "learning_rate": 0.00031562604727653187, "loss": 1.96, "step": 3112 }, { "epoch": 0.63, "learning_rate": 0.0003153205626684407, "loss": 1.9609, "step": 3113 }, { "epoch": 0.63, "learning_rate": 0.0003150151578679422, "loss": 1.9326, "step": 3114 }, { "epoch": 0.63, "learning_rate": 0.00031470983300701426, "loss": 2.0518, "step": 3115 }, { "epoch": 0.63, "learning_rate": 0.0003144045882176009, "loss": 1.7979, "step": 3116 }, { "epoch": 0.63, "learning_rate": 0.00031409942363161034, "loss": 1.9482, "step": 3117 }, { "epoch": 0.63, "learning_rate": 0.0003137943393809169, "loss": 1.96, "step": 3118 }, { "epoch": 0.63, "learning_rate": 0.0003134893355973606, "loss": 1.8691, "step": 3119 }, { "epoch": 0.63, "learning_rate": 0.00031318441241274544, "loss": 1.8799, "step": 3120 }, { "epoch": 0.63, "learning_rate": 0.0003128795699588416, "loss": 1.9668, "step": 3121 }, { "epoch": 0.63, "learning_rate": 0.0003125748083673842, "loss": 1.9355, "step": 3122 }, { "epoch": 0.63, "learning_rate": 0.000312270127770073, "loss": 1.9932, "step": 3123 }, { "epoch": 0.63, "learning_rate": 0.00031196552829857317, "loss": 1.9189, "step": 3124 }, { "epoch": 0.63, "learning_rate": 0.0003116610100845145, "loss": 1.9854, "step": 3125 }, { "epoch": 0.63, "learning_rate": 0.0003113565732594923, "loss": 2.0586, "step": 3126 }, { "epoch": 0.63, "learning_rate": 0.0003110522179550658, "loss": 1.8857, "step": 3127 }, { "epoch": 0.63, "learning_rate": 0.00031074794430275963, "loss": 1.9287, "step": 3128 }, { "epoch": 0.64, "learning_rate": 0.00031044375243406317, "loss": 1.9912, "step": 3129 }, { "epoch": 0.64, "learning_rate": 0.0003101396424804301, "loss": 2.0156, "step": 3130 }, { "epoch": 0.64, "learning_rate": 0.00030983561457327856, "loss": 1.8564, "step": 3131 }, { "epoch": 0.64, "learning_rate": 0.000309531668843992, "loss": 1.8896, "step": 3132 }, { "epoch": 0.64, "learning_rate": 0.0003092278054239178, "loss": 1.9521, "step": 3133 }, { "epoch": 0.64, "learning_rate": 0.00030892402444436756, "loss": 1.9082, "step": 3134 }, { "epoch": 0.64, "learning_rate": 0.00030862032603661785, "loss": 1.8584, "step": 3135 }, { "epoch": 0.64, "learning_rate": 0.0003083167103319093, "loss": 1.9912, "step": 3136 }, { "epoch": 0.64, "learning_rate": 0.0003080131774614464, "loss": 1.917, "step": 3137 }, { "epoch": 0.64, "learning_rate": 0.0003077097275563985, "loss": 1.8525, "step": 3138 }, { "epoch": 0.64, "learning_rate": 0.000307406360747899, "loss": 1.9834, "step": 3139 }, { "epoch": 0.64, "learning_rate": 0.00030710307716704476, "loss": 1.9199, "step": 3140 }, { "epoch": 0.64, "learning_rate": 0.00030679987694489725, "loss": 1.8945, "step": 3141 }, { "epoch": 0.64, "learning_rate": 0.00030649676021248185, "loss": 2.04, "step": 3142 }, { "epoch": 0.64, "learning_rate": 0.00030619372710078785, "loss": 2.0303, "step": 3143 }, { "epoch": 0.64, "learning_rate": 0.00030589077774076807, "loss": 1.9375, "step": 3144 }, { "epoch": 0.64, "learning_rate": 0.0003055879122633397, "loss": 1.999, "step": 3145 }, { "epoch": 0.64, "learning_rate": 0.00030528513079938334, "loss": 1.9473, "step": 3146 }, { "epoch": 0.64, "learning_rate": 0.00030498243347974307, "loss": 2.0107, "step": 3147 }, { "epoch": 0.64, "learning_rate": 0.0003046798204352272, "loss": 1.958, "step": 3148 }, { "epoch": 0.64, "learning_rate": 0.00030437729179660727, "loss": 1.9824, "step": 3149 }, { "epoch": 0.64, "learning_rate": 0.00030407484769461805, "loss": 1.9785, "step": 3150 }, { "epoch": 0.64, "learning_rate": 0.0003037724882599585, "loss": 2.0459, "step": 3151 }, { "epoch": 0.64, "learning_rate": 0.0003034702136232903, "loss": 1.8926, "step": 3152 }, { "epoch": 0.64, "learning_rate": 0.0003031680239152388, "loss": 1.9297, "step": 3153 }, { "epoch": 0.64, "learning_rate": 0.00030286591926639286, "loss": 1.9434, "step": 3154 }, { "epoch": 0.64, "learning_rate": 0.0003025638998073041, "loss": 2.0234, "step": 3155 }, { "epoch": 0.64, "learning_rate": 0.0003022619656684875, "loss": 1.9102, "step": 3156 }, { "epoch": 0.64, "learning_rate": 0.0003019601169804216, "loss": 1.8672, "step": 3157 }, { "epoch": 0.64, "learning_rate": 0.00030165835387354744, "loss": 1.9951, "step": 3158 }, { "epoch": 0.64, "learning_rate": 0.0003013566764782692, "loss": 1.9033, "step": 3159 }, { "epoch": 0.64, "learning_rate": 0.00030105508492495406, "loss": 1.9404, "step": 3160 }, { "epoch": 0.64, "learning_rate": 0.00030075357934393254, "loss": 1.9668, "step": 3161 }, { "epoch": 0.64, "learning_rate": 0.00030045215986549724, "loss": 1.874, "step": 3162 }, { "epoch": 0.64, "learning_rate": 0.00030015082661990393, "loss": 2.0645, "step": 3163 }, { "epoch": 0.64, "learning_rate": 0.0002998495797373715, "loss": 1.8867, "step": 3164 }, { "epoch": 0.64, "learning_rate": 0.0002995484193480808, "loss": 1.9775, "step": 3165 }, { "epoch": 0.64, "learning_rate": 0.0002992473455821756, "loss": 1.8818, "step": 3166 }, { "epoch": 0.64, "learning_rate": 0.00029894635856976266, "loss": 1.9102, "step": 3167 }, { "epoch": 0.64, "learning_rate": 0.0002986454584409106, "loss": 2.124, "step": 3168 }, { "epoch": 0.64, "learning_rate": 0.00029834464532565053, "loss": 1.8789, "step": 3169 }, { "epoch": 0.64, "learning_rate": 0.0002980439193539766, "loss": 1.9541, "step": 3170 }, { "epoch": 0.64, "learning_rate": 0.00029774328065584475, "loss": 1.9609, "step": 3171 }, { "epoch": 0.64, "learning_rate": 0.0002974427293611732, "loss": 1.8945, "step": 3172 }, { "epoch": 0.64, "learning_rate": 0.0002971422655998428, "loss": 1.9717, "step": 3173 }, { "epoch": 0.64, "learning_rate": 0.0002968418895016962, "loss": 1.9209, "step": 3174 }, { "epoch": 0.64, "learning_rate": 0.00029654160119653835, "loss": 1.8916, "step": 3175 }, { "epoch": 0.64, "learning_rate": 0.0002962414008141359, "loss": 1.8945, "step": 3176 }, { "epoch": 0.64, "learning_rate": 0.0002959412884842181, "loss": 1.916, "step": 3177 }, { "epoch": 0.65, "learning_rate": 0.00029564126433647586, "loss": 2.0254, "step": 3178 }, { "epoch": 0.65, "learning_rate": 0.00029534132850056173, "loss": 1.9297, "step": 3179 }, { "epoch": 0.65, "learning_rate": 0.00029504148110609063, "loss": 1.916, "step": 3180 }, { "epoch": 0.65, "learning_rate": 0.00029474172228263887, "loss": 2.0625, "step": 3181 }, { "epoch": 0.65, "learning_rate": 0.0002944420521597444, "loss": 1.8877, "step": 3182 }, { "epoch": 0.65, "learning_rate": 0.00029414247086690726, "loss": 1.8398, "step": 3183 }, { "epoch": 0.65, "learning_rate": 0.00029384297853358895, "loss": 1.957, "step": 3184 }, { "epoch": 0.65, "learning_rate": 0.00029354357528921205, "loss": 1.9512, "step": 3185 }, { "epoch": 0.65, "learning_rate": 0.0002932442612631614, "loss": 1.9375, "step": 3186 }, { "epoch": 0.65, "learning_rate": 0.00029294503658478275, "loss": 1.9941, "step": 3187 }, { "epoch": 0.65, "learning_rate": 0.0002926459013833834, "loss": 1.8691, "step": 3188 }, { "epoch": 0.65, "learning_rate": 0.0002923468557882322, "loss": 1.9346, "step": 3189 }, { "epoch": 0.65, "learning_rate": 0.00029204789992855884, "loss": 1.9629, "step": 3190 }, { "epoch": 0.65, "learning_rate": 0.00029174903393355454, "loss": 1.9648, "step": 3191 }, { "epoch": 0.65, "learning_rate": 0.00029145025793237143, "loss": 1.8799, "step": 3192 }, { "epoch": 0.65, "learning_rate": 0.00029115157205412333, "loss": 1.9316, "step": 3193 }, { "epoch": 0.65, "learning_rate": 0.00029085297642788454, "loss": 1.9629, "step": 3194 }, { "epoch": 0.65, "learning_rate": 0.0002905544711826903, "loss": 2.0234, "step": 3195 }, { "epoch": 0.65, "learning_rate": 0.00029025605644753737, "loss": 1.9854, "step": 3196 }, { "epoch": 0.65, "learning_rate": 0.0002899577323513829, "loss": 1.957, "step": 3197 }, { "epoch": 0.65, "learning_rate": 0.00028965949902314475, "loss": 1.9248, "step": 3198 }, { "epoch": 0.65, "learning_rate": 0.00028936135659170215, "loss": 2.04, "step": 3199 }, { "epoch": 0.65, "learning_rate": 0.0002890633051858948, "loss": 2.0146, "step": 3200 }, { "epoch": 0.65, "learning_rate": 0.00028876534493452264, "loss": 1.875, "step": 3201 }, { "epoch": 0.65, "learning_rate": 0.00028846747596634705, "loss": 1.9346, "step": 3202 }, { "epoch": 0.65, "learning_rate": 0.0002881696984100891, "loss": 1.9062, "step": 3203 }, { "epoch": 0.65, "learning_rate": 0.00028787201239443075, "loss": 1.9727, "step": 3204 }, { "epoch": 0.65, "learning_rate": 0.00028757441804801463, "loss": 2.0391, "step": 3205 }, { "epoch": 0.65, "learning_rate": 0.00028727691549944323, "loss": 1.8447, "step": 3206 }, { "epoch": 0.65, "learning_rate": 0.00028697950487728, "loss": 1.9795, "step": 3207 }, { "epoch": 0.65, "learning_rate": 0.00028668218631004786, "loss": 1.9102, "step": 3208 }, { "epoch": 0.65, "learning_rate": 0.00028638495992623094, "loss": 1.9688, "step": 3209 }, { "epoch": 0.65, "learning_rate": 0.00028608782585427275, "loss": 1.9473, "step": 3210 }, { "epoch": 0.65, "learning_rate": 0.00028579078422257686, "loss": 1.9014, "step": 3211 }, { "epoch": 0.65, "learning_rate": 0.0002854938351595078, "loss": 2.0547, "step": 3212 }, { "epoch": 0.65, "learning_rate": 0.0002851969787933889, "loss": 1.9756, "step": 3213 }, { "epoch": 0.65, "learning_rate": 0.00028490021525250434, "loss": 1.9531, "step": 3214 }, { "epoch": 0.65, "learning_rate": 0.0002846035446650981, "loss": 1.9268, "step": 3215 }, { "epoch": 0.65, "learning_rate": 0.0002843069671593734, "loss": 1.9297, "step": 3216 }, { "epoch": 0.65, "learning_rate": 0.0002840104828634935, "loss": 1.9102, "step": 3217 }, { "epoch": 0.65, "learning_rate": 0.00028371409190558196, "loss": 1.9336, "step": 3218 }, { "epoch": 0.65, "learning_rate": 0.00028341779441372095, "loss": 1.9385, "step": 3219 }, { "epoch": 0.65, "learning_rate": 0.00028312159051595314, "loss": 1.9756, "step": 3220 }, { "epoch": 0.65, "learning_rate": 0.0002828254803402807, "loss": 1.8213, "step": 3221 }, { "epoch": 0.65, "learning_rate": 0.00028252946401466464, "loss": 1.9238, "step": 3222 }, { "epoch": 0.65, "learning_rate": 0.0002822335416670257, "loss": 1.916, "step": 3223 }, { "epoch": 0.65, "learning_rate": 0.00028193771342524456, "loss": 2.0801, "step": 3224 }, { "epoch": 0.65, "learning_rate": 0.0002816419794171605, "loss": 1.8779, "step": 3225 }, { "epoch": 0.65, "learning_rate": 0.0002813463397705723, "loss": 1.96, "step": 3226 }, { "epoch": 0.65, "learning_rate": 0.0002810507946132382, "loss": 2.0947, "step": 3227 }, { "epoch": 0.66, "learning_rate": 0.0002807553440728755, "loss": 1.9023, "step": 3228 }, { "epoch": 0.66, "learning_rate": 0.00028045998827716046, "loss": 1.9277, "step": 3229 }, { "epoch": 0.66, "learning_rate": 0.0002801647273537283, "loss": 1.9502, "step": 3230 }, { "epoch": 0.66, "learning_rate": 0.00027986956143017386, "loss": 1.9629, "step": 3231 }, { "epoch": 0.66, "learning_rate": 0.0002795744906340501, "loss": 1.8574, "step": 3232 }, { "epoch": 0.66, "learning_rate": 0.00027927951509286924, "loss": 1.9424, "step": 3233 }, { "epoch": 0.66, "learning_rate": 0.00027898463493410255, "loss": 1.998, "step": 3234 }, { "epoch": 0.66, "learning_rate": 0.0002786898502851801, "loss": 2.0322, "step": 3235 }, { "epoch": 0.66, "learning_rate": 0.00027839516127348997, "loss": 1.9121, "step": 3236 }, { "epoch": 0.66, "learning_rate": 0.00027810056802637993, "loss": 2.0059, "step": 3237 }, { "epoch": 0.66, "learning_rate": 0.00027780607067115563, "loss": 1.9287, "step": 3238 }, { "epoch": 0.66, "learning_rate": 0.0002775116693350813, "loss": 1.9541, "step": 3239 }, { "epoch": 0.66, "learning_rate": 0.00027721736414538037, "loss": 1.9619, "step": 3240 }, { "epoch": 0.66, "learning_rate": 0.0002769231552292336, "loss": 1.957, "step": 3241 }, { "epoch": 0.66, "learning_rate": 0.0002766290427137815, "loss": 1.9268, "step": 3242 }, { "epoch": 0.66, "learning_rate": 0.00027633502672612154, "loss": 1.875, "step": 3243 }, { "epoch": 0.66, "learning_rate": 0.0002760411073933107, "loss": 1.9707, "step": 3244 }, { "epoch": 0.66, "learning_rate": 0.0002757472848423633, "loss": 1.9678, "step": 3245 }, { "epoch": 0.66, "learning_rate": 0.0002754535592002522, "loss": 1.9023, "step": 3246 }, { "epoch": 0.66, "learning_rate": 0.0002751599305939082, "loss": 1.9316, "step": 3247 }, { "epoch": 0.66, "learning_rate": 0.0002748663991502208, "loss": 1.9414, "step": 3248 }, { "epoch": 0.66, "learning_rate": 0.0002745729649960366, "loss": 1.9453, "step": 3249 }, { "epoch": 0.66, "learning_rate": 0.0002742796282581609, "loss": 1.9023, "step": 3250 }, { "epoch": 0.66, "learning_rate": 0.00027398638906335634, "loss": 1.959, "step": 3251 }, { "epoch": 0.66, "learning_rate": 0.0002736932475383436, "loss": 1.8574, "step": 3252 }, { "epoch": 0.66, "learning_rate": 0.0002734002038098015, "loss": 1.9395, "step": 3253 }, { "epoch": 0.66, "learning_rate": 0.000273107258004366, "loss": 1.9141, "step": 3254 }, { "epoch": 0.66, "learning_rate": 0.00027281441024863115, "loss": 1.9932, "step": 3255 }, { "epoch": 0.66, "learning_rate": 0.00027252166066914874, "loss": 1.9688, "step": 3256 }, { "epoch": 0.66, "learning_rate": 0.00027222900939242783, "loss": 1.9014, "step": 3257 }, { "epoch": 0.66, "learning_rate": 0.000271936456544935, "loss": 1.8887, "step": 3258 }, { "epoch": 0.66, "learning_rate": 0.0002716440022530942, "loss": 1.9893, "step": 3259 }, { "epoch": 0.66, "learning_rate": 0.0002713516466432874, "loss": 1.8916, "step": 3260 }, { "epoch": 0.66, "learning_rate": 0.00027105938984185306, "loss": 1.9189, "step": 3261 }, { "epoch": 0.66, "learning_rate": 0.0002707672319750877, "loss": 1.9521, "step": 3262 }, { "epoch": 0.66, "learning_rate": 0.0002704751731692448, "loss": 1.8662, "step": 3263 }, { "epoch": 0.66, "learning_rate": 0.00027018321355053485, "loss": 1.9541, "step": 3264 }, { "epoch": 0.66, "learning_rate": 0.0002698913532451256, "loss": 1.9541, "step": 3265 }, { "epoch": 0.66, "learning_rate": 0.00026959959237914214, "loss": 1.9512, "step": 3266 }, { "epoch": 0.66, "learning_rate": 0.0002693079310786661, "loss": 1.9531, "step": 3267 }, { "epoch": 0.66, "learning_rate": 0.0002690163694697363, "loss": 1.8877, "step": 3268 }, { "epoch": 0.66, "learning_rate": 0.00026872490767834863, "loss": 1.9551, "step": 3269 }, { "epoch": 0.66, "learning_rate": 0.00026843354583045597, "loss": 1.9854, "step": 3270 }, { "epoch": 0.66, "learning_rate": 0.0002681422840519674, "loss": 1.8809, "step": 3271 }, { "epoch": 0.66, "learning_rate": 0.00026785112246874944, "loss": 1.9395, "step": 3272 }, { "epoch": 0.66, "learning_rate": 0.0002675600612066249, "loss": 1.8477, "step": 3273 }, { "epoch": 0.66, "learning_rate": 0.0002672691003913732, "loss": 1.9375, "step": 3274 }, { "epoch": 0.66, "learning_rate": 0.0002669782401487307, "loss": 1.874, "step": 3275 }, { "epoch": 0.66, "learning_rate": 0.0002666874806043899, "loss": 1.8789, "step": 3276 }, { "epoch": 0.67, "learning_rate": 0.00026639682188400016, "loss": 1.9863, "step": 3277 }, { "epoch": 0.67, "learning_rate": 0.00026610626411316683, "loss": 1.9453, "step": 3278 }, { "epoch": 0.67, "learning_rate": 0.00026581580741745226, "loss": 1.8555, "step": 3279 }, { "epoch": 0.67, "learning_rate": 0.0002655254519223746, "loss": 1.8975, "step": 3280 }, { "epoch": 0.67, "learning_rate": 0.0002652351977534082, "loss": 1.8818, "step": 3281 }, { "epoch": 0.67, "learning_rate": 0.00026494504503598394, "loss": 1.9121, "step": 3282 }, { "epoch": 0.67, "learning_rate": 0.0002646549938954891, "loss": 1.9287, "step": 3283 }, { "epoch": 0.67, "learning_rate": 0.0002643650444572663, "loss": 1.8633, "step": 3284 }, { "epoch": 0.67, "learning_rate": 0.00026407519684661497, "loss": 1.9326, "step": 3285 }, { "epoch": 0.67, "learning_rate": 0.00026378545118879005, "loss": 1.9326, "step": 3286 }, { "epoch": 0.67, "learning_rate": 0.00026349580760900233, "loss": 1.9004, "step": 3287 }, { "epoch": 0.67, "learning_rate": 0.0002632062662324192, "loss": 1.9443, "step": 3288 }, { "epoch": 0.67, "learning_rate": 0.000262916827184163, "loss": 1.9492, "step": 3289 }, { "epoch": 0.67, "learning_rate": 0.0002626274905893125, "loss": 1.8955, "step": 3290 }, { "epoch": 0.67, "learning_rate": 0.000262338256572902, "loss": 1.9795, "step": 3291 }, { "epoch": 0.67, "learning_rate": 0.00026204912525992134, "loss": 1.8789, "step": 3292 }, { "epoch": 0.67, "learning_rate": 0.00026176009677531616, "loss": 1.9502, "step": 3293 }, { "epoch": 0.67, "learning_rate": 0.00026147117124398724, "loss": 1.9238, "step": 3294 }, { "epoch": 0.67, "learning_rate": 0.0002611823487907917, "loss": 1.9131, "step": 3295 }, { "epoch": 0.67, "learning_rate": 0.00026089362954054117, "loss": 1.9033, "step": 3296 }, { "epoch": 0.67, "learning_rate": 0.00026060501361800333, "loss": 1.9717, "step": 3297 }, { "epoch": 0.67, "learning_rate": 0.0002603165011479012, "loss": 1.8662, "step": 3298 }, { "epoch": 0.67, "learning_rate": 0.00026002809225491273, "loss": 1.999, "step": 3299 }, { "epoch": 0.67, "learning_rate": 0.00025973978706367106, "loss": 2.0869, "step": 3300 }, { "epoch": 0.67, "learning_rate": 0.00025945158569876526, "loss": 1.9951, "step": 3301 }, { "epoch": 0.67, "learning_rate": 0.00025916348828473857, "loss": 1.8887, "step": 3302 }, { "epoch": 0.67, "learning_rate": 0.00025887549494608975, "loss": 1.9062, "step": 3303 }, { "epoch": 0.67, "learning_rate": 0.00025858760580727274, "loss": 1.9678, "step": 3304 }, { "epoch": 0.67, "learning_rate": 0.0002582998209926965, "loss": 1.9297, "step": 3305 }, { "epoch": 0.67, "learning_rate": 0.00025801214062672425, "loss": 1.9668, "step": 3306 }, { "epoch": 0.67, "learning_rate": 0.000257724564833675, "loss": 1.8633, "step": 3307 }, { "epoch": 0.67, "learning_rate": 0.00025743709373782177, "loss": 1.9277, "step": 3308 }, { "epoch": 0.67, "learning_rate": 0.0002571497274633926, "loss": 1.9492, "step": 3309 }, { "epoch": 0.67, "learning_rate": 0.00025686246613457046, "loss": 1.8955, "step": 3310 }, { "epoch": 0.67, "learning_rate": 0.000256575309875493, "loss": 1.8516, "step": 3311 }, { "epoch": 0.67, "learning_rate": 0.000256288258810252, "loss": 1.9814, "step": 3312 }, { "epoch": 0.67, "learning_rate": 0.00025600131306289397, "loss": 1.9014, "step": 3313 }, { "epoch": 0.67, "learning_rate": 0.0002557144727574203, "loss": 2.0, "step": 3314 }, { "epoch": 0.67, "learning_rate": 0.0002554277380177864, "loss": 1.8975, "step": 3315 }, { "epoch": 0.67, "learning_rate": 0.00025514110896790183, "loss": 1.8975, "step": 3316 }, { "epoch": 0.67, "learning_rate": 0.0002548545857316311, "loss": 1.9199, "step": 3317 }, { "epoch": 0.67, "learning_rate": 0.0002545681684327928, "loss": 1.9678, "step": 3318 }, { "epoch": 0.67, "learning_rate": 0.00025428185719515945, "loss": 1.874, "step": 3319 }, { "epoch": 0.67, "learning_rate": 0.0002539956521424581, "loss": 1.9053, "step": 3320 }, { "epoch": 0.67, "learning_rate": 0.0002537095533983697, "loss": 1.9258, "step": 3321 }, { "epoch": 0.67, "learning_rate": 0.00025342356108652903, "loss": 1.9883, "step": 3322 }, { "epoch": 0.67, "learning_rate": 0.00025313767533052556, "loss": 1.959, "step": 3323 }, { "epoch": 0.67, "learning_rate": 0.00025285189625390194, "loss": 2.0654, "step": 3324 }, { "epoch": 0.67, "learning_rate": 0.0002525662239801553, "loss": 1.9336, "step": 3325 }, { "epoch": 0.68, "learning_rate": 0.0002522806586327364, "loss": 1.916, "step": 3326 }, { "epoch": 0.68, "learning_rate": 0.00025199520033504985, "loss": 1.9766, "step": 3327 }, { "epoch": 0.68, "learning_rate": 0.00025170984921045383, "loss": 1.9043, "step": 3328 }, { "epoch": 0.68, "learning_rate": 0.0002514246053822601, "loss": 1.9932, "step": 3329 }, { "epoch": 0.68, "learning_rate": 0.0002511394689737346, "loss": 1.9307, "step": 3330 }, { "epoch": 0.68, "learning_rate": 0.00025085444010809635, "loss": 1.9707, "step": 3331 }, { "epoch": 0.68, "learning_rate": 0.0002505695189085181, "loss": 1.9053, "step": 3332 }, { "epoch": 0.68, "learning_rate": 0.0002502847054981262, "loss": 1.9365, "step": 3333 }, { "epoch": 0.68, "learning_rate": 0.0002500000000000001, "loss": 1.9053, "step": 3334 }, { "epoch": 0.68, "learning_rate": 0.0002497154025371726, "loss": 1.9062, "step": 3335 }, { "epoch": 0.68, "learning_rate": 0.0002494309132326304, "loss": 1.9678, "step": 3336 }, { "epoch": 0.68, "learning_rate": 0.0002491465322093128, "loss": 1.8613, "step": 3337 }, { "epoch": 0.68, "learning_rate": 0.00024886225959011246, "loss": 1.9395, "step": 3338 }, { "epoch": 0.68, "learning_rate": 0.0002485780954978753, "loss": 1.9258, "step": 3339 }, { "epoch": 0.68, "learning_rate": 0.0002482940400554007, "loss": 1.832, "step": 3340 }, { "epoch": 0.68, "learning_rate": 0.00024801009338544023, "loss": 1.9277, "step": 3341 }, { "epoch": 0.68, "learning_rate": 0.0002477262556106994, "loss": 1.9385, "step": 3342 }, { "epoch": 0.68, "learning_rate": 0.00024744252685383596, "loss": 1.8115, "step": 3343 }, { "epoch": 0.68, "learning_rate": 0.0002471589072374606, "loss": 1.8652, "step": 3344 }, { "epoch": 0.68, "learning_rate": 0.00024687539688413723, "loss": 1.9062, "step": 3345 }, { "epoch": 0.68, "learning_rate": 0.00024659199591638267, "loss": 1.9131, "step": 3346 }, { "epoch": 0.68, "learning_rate": 0.00024630870445666573, "loss": 1.9268, "step": 3347 }, { "epoch": 0.68, "learning_rate": 0.00024602552262740834, "loss": 1.9121, "step": 3348 }, { "epoch": 0.68, "learning_rate": 0.0002457424505509853, "loss": 1.9121, "step": 3349 }, { "epoch": 0.68, "learning_rate": 0.0002454594883497235, "loss": 1.9736, "step": 3350 }, { "epoch": 0.68, "learning_rate": 0.0002451766361459025, "loss": 1.9668, "step": 3351 }, { "epoch": 0.68, "learning_rate": 0.0002448938940617546, "loss": 1.9688, "step": 3352 }, { "epoch": 0.68, "learning_rate": 0.00024461126221946423, "loss": 1.8438, "step": 3353 }, { "epoch": 0.68, "learning_rate": 0.00024432874074116815, "loss": 1.9883, "step": 3354 }, { "epoch": 0.68, "learning_rate": 0.00024404632974895575, "loss": 1.9629, "step": 3355 }, { "epoch": 0.68, "learning_rate": 0.0002437640293648683, "loss": 1.9473, "step": 3356 }, { "epoch": 0.68, "learning_rate": 0.00024348183971089927, "loss": 1.9072, "step": 3357 }, { "epoch": 0.68, "learning_rate": 0.00024319976090899482, "loss": 1.9189, "step": 3358 }, { "epoch": 0.68, "learning_rate": 0.00024291779308105237, "loss": 1.9287, "step": 3359 }, { "epoch": 0.68, "learning_rate": 0.00024263593634892233, "loss": 2.0117, "step": 3360 }, { "epoch": 0.68, "learning_rate": 0.00024235419083440613, "loss": 1.8906, "step": 3361 }, { "epoch": 0.68, "learning_rate": 0.000242072556659258, "loss": 1.8584, "step": 3362 }, { "epoch": 0.68, "learning_rate": 0.00024179103394518349, "loss": 1.9131, "step": 3363 }, { "epoch": 0.68, "learning_rate": 0.00024150962281384005, "loss": 1.9004, "step": 3364 }, { "epoch": 0.68, "learning_rate": 0.00024122832338683726, "loss": 1.9658, "step": 3365 }, { "epoch": 0.68, "learning_rate": 0.0002409471357857359, "loss": 1.8203, "step": 3366 }, { "epoch": 0.68, "learning_rate": 0.0002406660601320489, "loss": 1.8594, "step": 3367 }, { "epoch": 0.68, "learning_rate": 0.00024038509654724077, "loss": 2.0225, "step": 3368 }, { "epoch": 0.68, "learning_rate": 0.00024010424515272732, "loss": 1.9385, "step": 3369 }, { "epoch": 0.68, "learning_rate": 0.00023982350606987585, "loss": 1.9697, "step": 3370 }, { "epoch": 0.68, "learning_rate": 0.00023954287942000552, "loss": 1.9648, "step": 3371 }, { "epoch": 0.68, "learning_rate": 0.00023926236532438645, "loss": 1.9346, "step": 3372 }, { "epoch": 0.68, "learning_rate": 0.00023898196390424042, "loss": 2.0107, "step": 3373 }, { "epoch": 0.68, "learning_rate": 0.00023870167528074066, "loss": 1.8604, "step": 3374 }, { "epoch": 0.69, "learning_rate": 0.0002384214995750112, "loss": 1.9443, "step": 3375 }, { "epoch": 0.69, "learning_rate": 0.0002381414369081274, "loss": 1.958, "step": 3376 }, { "epoch": 0.69, "learning_rate": 0.00023786148740111618, "loss": 1.8877, "step": 3377 }, { "epoch": 0.69, "learning_rate": 0.00023758165117495505, "loss": 1.957, "step": 3378 }, { "epoch": 0.69, "learning_rate": 0.0002373019283505727, "loss": 1.9414, "step": 3379 }, { "epoch": 0.69, "learning_rate": 0.00023702231904884895, "loss": 1.8691, "step": 3380 }, { "epoch": 0.69, "learning_rate": 0.00023674282339061477, "loss": 2.0078, "step": 3381 }, { "epoch": 0.69, "learning_rate": 0.0002364634414966515, "loss": 1.9258, "step": 3382 }, { "epoch": 0.69, "learning_rate": 0.00023618417348769138, "loss": 1.9043, "step": 3383 }, { "epoch": 0.69, "learning_rate": 0.00023590501948441805, "loss": 1.8379, "step": 3384 }, { "epoch": 0.69, "learning_rate": 0.00023562597960746524, "loss": 1.9941, "step": 3385 }, { "epoch": 0.69, "learning_rate": 0.00023534705397741735, "loss": 1.8428, "step": 3386 }, { "epoch": 0.69, "learning_rate": 0.00023506824271480987, "loss": 1.8096, "step": 3387 }, { "epoch": 0.69, "learning_rate": 0.0002347895459401288, "loss": 1.9336, "step": 3388 }, { "epoch": 0.69, "learning_rate": 0.00023451096377381014, "loss": 1.8682, "step": 3389 }, { "epoch": 0.69, "learning_rate": 0.00023423249633624106, "loss": 2.0273, "step": 3390 }, { "epoch": 0.69, "learning_rate": 0.00023395414374775865, "loss": 1.9238, "step": 3391 }, { "epoch": 0.69, "learning_rate": 0.00023367590612865036, "loss": 1.8975, "step": 3392 }, { "epoch": 0.69, "learning_rate": 0.0002333977835991545, "loss": 1.9121, "step": 3393 }, { "epoch": 0.69, "learning_rate": 0.00023311977627945885, "loss": 1.9033, "step": 3394 }, { "epoch": 0.69, "learning_rate": 0.00023284188428970222, "loss": 1.8281, "step": 3395 }, { "epoch": 0.69, "learning_rate": 0.00023256410774997284, "loss": 1.8721, "step": 3396 }, { "epoch": 0.69, "learning_rate": 0.0002322864467803097, "loss": 1.9453, "step": 3397 }, { "epoch": 0.69, "learning_rate": 0.00023200890150070147, "loss": 1.8477, "step": 3398 }, { "epoch": 0.69, "learning_rate": 0.00023173147203108663, "loss": 1.96, "step": 3399 }, { "epoch": 0.69, "learning_rate": 0.00023145415849135426, "loss": 1.8496, "step": 3400 }, { "epoch": 0.69, "learning_rate": 0.00023117696100134261, "loss": 1.9766, "step": 3401 }, { "epoch": 0.69, "learning_rate": 0.00023089987968084036, "loss": 1.916, "step": 3402 }, { "epoch": 0.69, "learning_rate": 0.00023062291464958585, "loss": 1.9727, "step": 3403 }, { "epoch": 0.69, "learning_rate": 0.000230346066027267, "loss": 1.9355, "step": 3404 }, { "epoch": 0.69, "learning_rate": 0.0002300693339335212, "loss": 1.8652, "step": 3405 }, { "epoch": 0.69, "learning_rate": 0.0002297927184879363, "loss": 1.8809, "step": 3406 }, { "epoch": 0.69, "learning_rate": 0.00022951621981004872, "loss": 1.9336, "step": 3407 }, { "epoch": 0.69, "learning_rate": 0.00022923983801934522, "loss": 1.9316, "step": 3408 }, { "epoch": 0.69, "learning_rate": 0.0002289635732352619, "loss": 1.9609, "step": 3409 }, { "epoch": 0.69, "learning_rate": 0.00022868742557718402, "loss": 1.9111, "step": 3410 }, { "epoch": 0.69, "learning_rate": 0.00022841139516444626, "loss": 1.9639, "step": 3411 }, { "epoch": 0.69, "learning_rate": 0.0002281354821163326, "loss": 1.8564, "step": 3412 }, { "epoch": 0.69, "learning_rate": 0.0002278596865520768, "loss": 1.8984, "step": 3413 }, { "epoch": 0.69, "learning_rate": 0.00022758400859086114, "loss": 2.0312, "step": 3414 }, { "epoch": 0.69, "learning_rate": 0.00022730844835181757, "loss": 1.8945, "step": 3415 }, { "epoch": 0.69, "learning_rate": 0.0002270330059540272, "loss": 1.9619, "step": 3416 }, { "epoch": 0.69, "learning_rate": 0.00022675768151651993, "loss": 1.9502, "step": 3417 }, { "epoch": 0.69, "learning_rate": 0.00022648247515827453, "loss": 1.9072, "step": 3418 }, { "epoch": 0.69, "learning_rate": 0.00022620738699821936, "loss": 2.0205, "step": 3419 }, { "epoch": 0.69, "learning_rate": 0.0002259324171552312, "loss": 1.9053, "step": 3420 }, { "epoch": 0.69, "learning_rate": 0.00022565756574813574, "loss": 1.8613, "step": 3421 }, { "epoch": 0.69, "learning_rate": 0.0002253828328957076, "loss": 1.9746, "step": 3422 }, { "epoch": 0.69, "learning_rate": 0.00022510821871667047, "loss": 1.9854, "step": 3423 }, { "epoch": 0.69, "learning_rate": 0.0002248337233296961, "loss": 1.8799, "step": 3424 }, { "epoch": 0.7, "learning_rate": 0.00022455934685340558, "loss": 1.8652, "step": 3425 }, { "epoch": 0.7, "learning_rate": 0.00022428508940636804, "loss": 1.8857, "step": 3426 }, { "epoch": 0.7, "learning_rate": 0.00022401095110710135, "loss": 1.8896, "step": 3427 }, { "epoch": 0.7, "learning_rate": 0.00022373693207407226, "loss": 1.9121, "step": 3428 }, { "epoch": 0.7, "learning_rate": 0.00022346303242569537, "loss": 2.0449, "step": 3429 }, { "epoch": 0.7, "learning_rate": 0.0002231892522803343, "loss": 1.9258, "step": 3430 }, { "epoch": 0.7, "learning_rate": 0.00022291559175630044, "loss": 1.8936, "step": 3431 }, { "epoch": 0.7, "learning_rate": 0.00022264205097185407, "loss": 1.9395, "step": 3432 }, { "epoch": 0.7, "learning_rate": 0.00022236863004520323, "loss": 1.9082, "step": 3433 }, { "epoch": 0.7, "learning_rate": 0.0002220953290945043, "loss": 1.9131, "step": 3434 }, { "epoch": 0.7, "learning_rate": 0.000221822148237862, "loss": 1.9844, "step": 3435 }, { "epoch": 0.7, "learning_rate": 0.0002215490875933292, "loss": 1.8896, "step": 3436 }, { "epoch": 0.7, "learning_rate": 0.00022127614727890626, "loss": 1.8848, "step": 3437 }, { "epoch": 0.7, "learning_rate": 0.00022100332741254236, "loss": 1.9775, "step": 3438 }, { "epoch": 0.7, "learning_rate": 0.000220730628112134, "loss": 1.9473, "step": 3439 }, { "epoch": 0.7, "learning_rate": 0.00022045804949552555, "loss": 1.8906, "step": 3440 }, { "epoch": 0.7, "learning_rate": 0.00022018559168050995, "loss": 1.9248, "step": 3441 }, { "epoch": 0.7, "learning_rate": 0.00021991325478482694, "loss": 1.9004, "step": 3442 }, { "epoch": 0.7, "learning_rate": 0.00021964103892616477, "loss": 1.835, "step": 3443 }, { "epoch": 0.7, "learning_rate": 0.0002193689442221593, "loss": 1.9688, "step": 3444 }, { "epoch": 0.7, "learning_rate": 0.00021909697079039363, "loss": 1.8291, "step": 3445 }, { "epoch": 0.7, "learning_rate": 0.00021882511874839878, "loss": 1.9551, "step": 3446 }, { "epoch": 0.7, "learning_rate": 0.000218553388213653, "loss": 1.8857, "step": 3447 }, { "epoch": 0.7, "learning_rate": 0.00021828177930358251, "loss": 1.9297, "step": 3448 }, { "epoch": 0.7, "learning_rate": 0.00021801029213556052, "loss": 1.9639, "step": 3449 }, { "epoch": 0.7, "learning_rate": 0.00021773892682690782, "loss": 1.957, "step": 3450 }, { "epoch": 0.7, "learning_rate": 0.00021746768349489281, "loss": 1.9707, "step": 3451 }, { "epoch": 0.7, "learning_rate": 0.0002171965622567308, "loss": 1.8408, "step": 3452 }, { "epoch": 0.7, "learning_rate": 0.00021692556322958412, "loss": 1.8887, "step": 3453 }, { "epoch": 0.7, "learning_rate": 0.00021665468653056307, "loss": 1.8164, "step": 3454 }, { "epoch": 0.7, "learning_rate": 0.00021638393227672444, "loss": 1.8701, "step": 3455 }, { "epoch": 0.7, "learning_rate": 0.00021611330058507206, "loss": 1.8818, "step": 3456 }, { "epoch": 0.7, "learning_rate": 0.0002158427915725573, "loss": 1.8828, "step": 3457 }, { "epoch": 0.7, "learning_rate": 0.00021557240535607835, "loss": 2.0039, "step": 3458 }, { "epoch": 0.7, "learning_rate": 0.00021530214205247995, "loss": 1.9863, "step": 3459 }, { "epoch": 0.7, "learning_rate": 0.00021503200177855426, "loss": 1.8877, "step": 3460 }, { "epoch": 0.7, "learning_rate": 0.0002147619846510399, "loss": 1.8652, "step": 3461 }, { "epoch": 0.7, "learning_rate": 0.00021449209078662224, "loss": 1.8467, "step": 3462 }, { "epoch": 0.7, "learning_rate": 0.00021422232030193374, "loss": 1.8848, "step": 3463 }, { "epoch": 0.7, "learning_rate": 0.0002139526733135534, "loss": 1.9561, "step": 3464 }, { "epoch": 0.7, "learning_rate": 0.00021368314993800675, "loss": 1.7588, "step": 3465 }, { "epoch": 0.7, "learning_rate": 0.00021341375029176562, "loss": 1.9668, "step": 3466 }, { "epoch": 0.7, "learning_rate": 0.00021314447449124918, "loss": 1.8691, "step": 3467 }, { "epoch": 0.7, "learning_rate": 0.00021287532265282239, "loss": 1.8525, "step": 3468 }, { "epoch": 0.7, "learning_rate": 0.0002126062948927966, "loss": 1.9512, "step": 3469 }, { "epoch": 0.7, "learning_rate": 0.00021233739132743007, "loss": 1.9561, "step": 3470 }, { "epoch": 0.7, "learning_rate": 0.00021206861207292716, "loss": 1.8857, "step": 3471 }, { "epoch": 0.7, "learning_rate": 0.00021179995724543823, "loss": 1.915, "step": 3472 }, { "epoch": 0.7, "learning_rate": 0.00021153142696106036, "loss": 1.876, "step": 3473 }, { "epoch": 0.71, "learning_rate": 0.00021126302133583647, "loss": 1.877, "step": 3474 }, { "epoch": 0.71, "learning_rate": 0.0002109947404857554, "loss": 1.9785, "step": 3475 }, { "epoch": 0.71, "learning_rate": 0.00021072658452675274, "loss": 1.793, "step": 3476 }, { "epoch": 0.71, "learning_rate": 0.00021045855357470944, "loss": 1.9277, "step": 3477 }, { "epoch": 0.71, "learning_rate": 0.00021019064774545283, "loss": 1.9092, "step": 3478 }, { "epoch": 0.71, "learning_rate": 0.00020992286715475623, "loss": 1.9795, "step": 3479 }, { "epoch": 0.71, "learning_rate": 0.00020965521191833848, "loss": 1.877, "step": 3480 }, { "epoch": 0.71, "learning_rate": 0.0002093876821518645, "loss": 1.8721, "step": 3481 }, { "epoch": 0.71, "learning_rate": 0.0002091202779709447, "loss": 1.9912, "step": 3482 }, { "epoch": 0.71, "learning_rate": 0.00020885299949113578, "loss": 1.8994, "step": 3483 }, { "epoch": 0.71, "learning_rate": 0.00020858584682793946, "loss": 1.9248, "step": 3484 }, { "epoch": 0.71, "learning_rate": 0.0002083188200968036, "loss": 1.959, "step": 3485 }, { "epoch": 0.71, "learning_rate": 0.00020805191941312157, "loss": 1.8428, "step": 3486 }, { "epoch": 0.71, "learning_rate": 0.00020778514489223205, "loss": 1.9062, "step": 3487 }, { "epoch": 0.71, "learning_rate": 0.000207518496649419, "loss": 1.9434, "step": 3488 }, { "epoch": 0.71, "learning_rate": 0.00020725197479991265, "loss": 1.9385, "step": 3489 }, { "epoch": 0.71, "learning_rate": 0.00020698557945888774, "loss": 1.9229, "step": 3490 }, { "epoch": 0.71, "learning_rate": 0.0002067193107414645, "loss": 1.835, "step": 3491 }, { "epoch": 0.71, "learning_rate": 0.00020645316876270886, "loss": 1.9746, "step": 3492 }, { "epoch": 0.71, "learning_rate": 0.0002061871536376319, "loss": 1.9258, "step": 3493 }, { "epoch": 0.71, "learning_rate": 0.00020592126548118933, "loss": 1.958, "step": 3494 }, { "epoch": 0.71, "learning_rate": 0.00020565550440828278, "loss": 1.9453, "step": 3495 }, { "epoch": 0.71, "learning_rate": 0.0002053898705337583, "loss": 1.8408, "step": 3496 }, { "epoch": 0.71, "learning_rate": 0.0002051243639724071, "loss": 1.8701, "step": 3497 }, { "epoch": 0.71, "learning_rate": 0.0002048589848389657, "loss": 1.8574, "step": 3498 }, { "epoch": 0.71, "learning_rate": 0.00020459373324811548, "loss": 1.9717, "step": 3499 }, { "epoch": 0.71, "learning_rate": 0.00020432860931448243, "loss": 1.9238, "step": 3500 }, { "epoch": 0.71, "learning_rate": 0.00020406361315263732, "loss": 1.8936, "step": 3501 }, { "epoch": 0.71, "learning_rate": 0.00020379874487709633, "loss": 1.9053, "step": 3502 }, { "epoch": 0.71, "learning_rate": 0.00020353400460231975, "loss": 1.9541, "step": 3503 }, { "epoch": 0.71, "learning_rate": 0.00020326939244271253, "loss": 1.9238, "step": 3504 }, { "epoch": 0.71, "learning_rate": 0.0002030049085126247, "loss": 1.9219, "step": 3505 }, { "epoch": 0.71, "learning_rate": 0.00020274055292635086, "loss": 1.9238, "step": 3506 }, { "epoch": 0.71, "learning_rate": 0.00020247632579812957, "loss": 1.8965, "step": 3507 }, { "epoch": 0.71, "learning_rate": 0.0002022122272421446, "loss": 1.9043, "step": 3508 }, { "epoch": 0.71, "learning_rate": 0.00020194825737252366, "loss": 1.873, "step": 3509 }, { "epoch": 0.71, "learning_rate": 0.00020168441630333877, "loss": 1.9014, "step": 3510 }, { "epoch": 0.71, "learning_rate": 0.00020142070414860702, "loss": 1.9707, "step": 3511 }, { "epoch": 0.71, "learning_rate": 0.00020115712102228877, "loss": 1.9365, "step": 3512 }, { "epoch": 0.71, "learning_rate": 0.00020089366703828965, "loss": 1.8818, "step": 3513 }, { "epoch": 0.71, "learning_rate": 0.00020063034231045852, "loss": 1.9316, "step": 3514 }, { "epoch": 0.71, "learning_rate": 0.00020036714695258922, "loss": 2.0107, "step": 3515 }, { "epoch": 0.71, "learning_rate": 0.00020010408107841915, "loss": 1.8604, "step": 3516 }, { "epoch": 0.71, "learning_rate": 0.00019984114480162973, "loss": 1.9785, "step": 3517 }, { "epoch": 0.71, "learning_rate": 0.00019957833823584692, "loss": 1.8213, "step": 3518 }, { "epoch": 0.71, "learning_rate": 0.00019931566149463992, "loss": 1.8486, "step": 3519 }, { "epoch": 0.71, "learning_rate": 0.00019905311469152226, "loss": 1.9648, "step": 3520 }, { "epoch": 0.71, "learning_rate": 0.00019879069793995153, "loss": 1.8408, "step": 3521 }, { "epoch": 0.71, "learning_rate": 0.00019852841135332855, "loss": 1.8604, "step": 3522 }, { "epoch": 0.72, "learning_rate": 0.00019826625504499806, "loss": 1.9932, "step": 3523 }, { "epoch": 0.72, "learning_rate": 0.0001980042291282489, "loss": 1.8242, "step": 3524 }, { "epoch": 0.72, "learning_rate": 0.0001977423337163129, "loss": 1.9434, "step": 3525 }, { "epoch": 0.72, "learning_rate": 0.00019748056892236605, "loss": 1.915, "step": 3526 }, { "epoch": 0.72, "learning_rate": 0.00019721893485952785, "loss": 1.8662, "step": 3527 }, { "epoch": 0.72, "learning_rate": 0.00019695743164086104, "loss": 1.9355, "step": 3528 }, { "epoch": 0.72, "learning_rate": 0.00019669605937937192, "loss": 1.8926, "step": 3529 }, { "epoch": 0.72, "learning_rate": 0.00019643481818801002, "loss": 1.8916, "step": 3530 }, { "epoch": 0.72, "learning_rate": 0.00019617370817966872, "loss": 2.0068, "step": 3531 }, { "epoch": 0.72, "learning_rate": 0.00019591272946718413, "loss": 1.9365, "step": 3532 }, { "epoch": 0.72, "learning_rate": 0.00019565188216333611, "loss": 1.9648, "step": 3533 }, { "epoch": 0.72, "learning_rate": 0.00019539116638084758, "loss": 1.9678, "step": 3534 }, { "epoch": 0.72, "learning_rate": 0.0001951305822323845, "loss": 1.8018, "step": 3535 }, { "epoch": 0.72, "learning_rate": 0.00019487012983055585, "loss": 2.0508, "step": 3536 }, { "epoch": 0.72, "learning_rate": 0.00019460980928791405, "loss": 1.9414, "step": 3537 }, { "epoch": 0.72, "learning_rate": 0.00019434962071695434, "loss": 1.9639, "step": 3538 }, { "epoch": 0.72, "learning_rate": 0.00019408956423011458, "loss": 1.7695, "step": 3539 }, { "epoch": 0.72, "learning_rate": 0.0001938296399397762, "loss": 1.9668, "step": 3540 }, { "epoch": 0.72, "learning_rate": 0.00019356984795826333, "loss": 1.9014, "step": 3541 }, { "epoch": 0.72, "learning_rate": 0.00019331018839784236, "loss": 1.9873, "step": 3542 }, { "epoch": 0.72, "learning_rate": 0.0001930506613707233, "loss": 1.8232, "step": 3543 }, { "epoch": 0.72, "learning_rate": 0.00019279126698905835, "loss": 1.8818, "step": 3544 }, { "epoch": 0.72, "learning_rate": 0.0001925320053649422, "loss": 1.8994, "step": 3545 }, { "epoch": 0.72, "learning_rate": 0.00019227287661041293, "loss": 1.8438, "step": 3546 }, { "epoch": 0.72, "learning_rate": 0.00019201388083745036, "loss": 1.8203, "step": 3547 }, { "epoch": 0.72, "learning_rate": 0.00019175501815797759, "loss": 1.8848, "step": 3548 }, { "epoch": 0.72, "learning_rate": 0.0001914962886838595, "loss": 1.8564, "step": 3549 }, { "epoch": 0.72, "learning_rate": 0.0001912376925269041, "loss": 1.9277, "step": 3550 }, { "epoch": 0.72, "learning_rate": 0.00019097922979886128, "loss": 1.9355, "step": 3551 }, { "epoch": 0.72, "learning_rate": 0.0001907209006114232, "loss": 1.874, "step": 3552 }, { "epoch": 0.72, "learning_rate": 0.00019046270507622505, "loss": 2.0264, "step": 3553 }, { "epoch": 0.72, "learning_rate": 0.00019020464330484323, "loss": 1.918, "step": 3554 }, { "epoch": 0.72, "learning_rate": 0.00018994671540879704, "loss": 1.874, "step": 3555 }, { "epoch": 0.72, "learning_rate": 0.00018968892149954809, "loss": 1.9355, "step": 3556 }, { "epoch": 0.72, "learning_rate": 0.00018943126168849945, "loss": 1.8711, "step": 3557 }, { "epoch": 0.72, "learning_rate": 0.00018917373608699634, "loss": 1.9404, "step": 3558 }, { "epoch": 0.72, "learning_rate": 0.00018891634480632663, "loss": 1.9219, "step": 3559 }, { "epoch": 0.72, "learning_rate": 0.00018865908795771924, "loss": 1.958, "step": 3560 }, { "epoch": 0.72, "learning_rate": 0.00018840196565234575, "loss": 2.0029, "step": 3561 }, { "epoch": 0.72, "learning_rate": 0.00018814497800131937, "loss": 2.0322, "step": 3562 }, { "epoch": 0.72, "learning_rate": 0.00018788812511569492, "loss": 1.9082, "step": 3563 }, { "epoch": 0.72, "learning_rate": 0.0001876314071064691, "loss": 1.8994, "step": 3564 }, { "epoch": 0.72, "learning_rate": 0.00018737482408458012, "loss": 2.0215, "step": 3565 }, { "epoch": 0.72, "learning_rate": 0.00018711837616090855, "loss": 2.041, "step": 3566 }, { "epoch": 0.72, "learning_rate": 0.0001868620634462756, "loss": 1.9834, "step": 3567 }, { "epoch": 0.72, "learning_rate": 0.00018660588605144484, "loss": 1.8223, "step": 3568 }, { "epoch": 0.72, "learning_rate": 0.00018634984408712118, "loss": 1.916, "step": 3569 }, { "epoch": 0.72, "learning_rate": 0.00018609393766395082, "loss": 1.9482, "step": 3570 }, { "epoch": 0.72, "learning_rate": 0.0001858381668925212, "loss": 1.8359, "step": 3571 }, { "epoch": 0.72, "learning_rate": 0.00018558253188336176, "loss": 1.916, "step": 3572 }, { "epoch": 0.73, "learning_rate": 0.0001853270327469428, "loss": 1.9629, "step": 3573 }, { "epoch": 0.73, "learning_rate": 0.00018507166959367578, "loss": 1.9688, "step": 3574 }, { "epoch": 0.73, "learning_rate": 0.0001848164425339139, "loss": 1.9229, "step": 3575 }, { "epoch": 0.73, "learning_rate": 0.00018456135167795135, "loss": 1.9492, "step": 3576 }, { "epoch": 0.73, "learning_rate": 0.00018430639713602314, "loss": 1.9844, "step": 3577 }, { "epoch": 0.73, "learning_rate": 0.00018405157901830595, "loss": 1.8994, "step": 3578 }, { "epoch": 0.73, "learning_rate": 0.000183796897434917, "loss": 1.8691, "step": 3579 }, { "epoch": 0.73, "learning_rate": 0.00018354235249591473, "loss": 1.876, "step": 3580 }, { "epoch": 0.73, "learning_rate": 0.00018328794431129826, "loss": 1.9092, "step": 3581 }, { "epoch": 0.73, "learning_rate": 0.0001830336729910081, "loss": 1.8662, "step": 3582 }, { "epoch": 0.73, "learning_rate": 0.00018277953864492548, "loss": 1.877, "step": 3583 }, { "epoch": 0.73, "learning_rate": 0.00018252554138287191, "loss": 1.9521, "step": 3584 }, { "epoch": 0.73, "learning_rate": 0.0001822716813146106, "loss": 1.7842, "step": 3585 }, { "epoch": 0.73, "learning_rate": 0.0001820179585498446, "loss": 1.9863, "step": 3586 }, { "epoch": 0.73, "learning_rate": 0.00018176437319821786, "loss": 1.874, "step": 3587 }, { "epoch": 0.73, "learning_rate": 0.00018151092536931523, "loss": 1.7959, "step": 3588 }, { "epoch": 0.73, "learning_rate": 0.0001812576151726621, "loss": 1.9062, "step": 3589 }, { "epoch": 0.73, "learning_rate": 0.000181004442717724, "loss": 1.8896, "step": 3590 }, { "epoch": 0.73, "learning_rate": 0.00018075140811390738, "loss": 1.8467, "step": 3591 }, { "epoch": 0.73, "learning_rate": 0.0001804985114705588, "loss": 2.0049, "step": 3592 }, { "epoch": 0.73, "learning_rate": 0.0001802457528969652, "loss": 1.835, "step": 3593 }, { "epoch": 0.73, "learning_rate": 0.0001799931325023542, "loss": 1.9424, "step": 3594 }, { "epoch": 0.73, "learning_rate": 0.00017974065039589332, "loss": 1.8838, "step": 3595 }, { "epoch": 0.73, "learning_rate": 0.00017948830668669047, "loss": 1.7344, "step": 3596 }, { "epoch": 0.73, "learning_rate": 0.000179236101483794, "loss": 1.9717, "step": 3597 }, { "epoch": 0.73, "learning_rate": 0.00017898403489619198, "loss": 1.8896, "step": 3598 }, { "epoch": 0.73, "learning_rate": 0.00017873210703281284, "loss": 1.9502, "step": 3599 }, { "epoch": 0.73, "learning_rate": 0.0001784803180025247, "loss": 1.873, "step": 3600 }, { "epoch": 0.73, "learning_rate": 0.00017822866791413638, "loss": 1.9121, "step": 3601 }, { "epoch": 0.73, "learning_rate": 0.00017797715687639594, "loss": 1.8994, "step": 3602 }, { "epoch": 0.73, "learning_rate": 0.0001777257849979918, "loss": 1.916, "step": 3603 }, { "epoch": 0.73, "learning_rate": 0.00017747455238755223, "loss": 1.8867, "step": 3604 }, { "epoch": 0.73, "learning_rate": 0.00017722345915364507, "loss": 1.9668, "step": 3605 }, { "epoch": 0.73, "learning_rate": 0.00017697250540477788, "loss": 1.9365, "step": 3606 }, { "epoch": 0.73, "learning_rate": 0.0001767216912493984, "loss": 1.8994, "step": 3607 }, { "epoch": 0.73, "learning_rate": 0.00017647101679589373, "loss": 1.8525, "step": 3608 }, { "epoch": 0.73, "learning_rate": 0.00017622048215259028, "loss": 1.8984, "step": 3609 }, { "epoch": 0.73, "learning_rate": 0.0001759700874277546, "loss": 1.8809, "step": 3610 }, { "epoch": 0.73, "learning_rate": 0.00017571983272959284, "loss": 1.8457, "step": 3611 }, { "epoch": 0.73, "learning_rate": 0.00017546971816624995, "loss": 1.9189, "step": 3612 }, { "epoch": 0.73, "learning_rate": 0.00017521974384581112, "loss": 1.9316, "step": 3613 }, { "epoch": 0.73, "learning_rate": 0.0001749699098763003, "loss": 1.9951, "step": 3614 }, { "epoch": 0.73, "learning_rate": 0.00017472021636568108, "loss": 1.9102, "step": 3615 }, { "epoch": 0.73, "learning_rate": 0.0001744706634218562, "loss": 1.9346, "step": 3616 }, { "epoch": 0.73, "learning_rate": 0.00017422125115266785, "loss": 1.7627, "step": 3617 }, { "epoch": 0.73, "learning_rate": 0.00017397197966589756, "loss": 1.9785, "step": 3618 }, { "epoch": 0.73, "learning_rate": 0.00017372284906926543, "loss": 1.8164, "step": 3619 }, { "epoch": 0.73, "learning_rate": 0.00017347385947043143, "loss": 1.8447, "step": 3620 }, { "epoch": 0.73, "learning_rate": 0.0001732250109769941, "loss": 1.8643, "step": 3621 }, { "epoch": 0.74, "learning_rate": 0.00017297630369649087, "loss": 1.9375, "step": 3622 }, { "epoch": 0.74, "learning_rate": 0.0001727277377363986, "loss": 1.8643, "step": 3623 }, { "epoch": 0.74, "learning_rate": 0.00017247931320413308, "loss": 1.9355, "step": 3624 }, { "epoch": 0.74, "learning_rate": 0.00017223103020704838, "loss": 1.9668, "step": 3625 }, { "epoch": 0.74, "learning_rate": 0.00017198288885243818, "loss": 1.8525, "step": 3626 }, { "epoch": 0.74, "learning_rate": 0.0001717348892475345, "loss": 1.8545, "step": 3627 }, { "epoch": 0.74, "learning_rate": 0.00017148703149950784, "loss": 1.8525, "step": 3628 }, { "epoch": 0.74, "learning_rate": 0.00017123931571546826, "loss": 1.8545, "step": 3629 }, { "epoch": 0.74, "learning_rate": 0.0001709917420024635, "loss": 1.9131, "step": 3630 }, { "epoch": 0.74, "learning_rate": 0.00017074431046748073, "loss": 1.9521, "step": 3631 }, { "epoch": 0.74, "learning_rate": 0.000170497021217445, "loss": 1.9072, "step": 3632 }, { "epoch": 0.74, "learning_rate": 0.00017024987435922053, "loss": 1.8945, "step": 3633 }, { "epoch": 0.74, "learning_rate": 0.0001700028699996094, "loss": 1.8887, "step": 3634 }, { "epoch": 0.74, "learning_rate": 0.00016975600824535226, "loss": 1.9229, "step": 3635 }, { "epoch": 0.74, "learning_rate": 0.00016950928920312857, "loss": 1.8027, "step": 3636 }, { "epoch": 0.74, "learning_rate": 0.0001692627129795555, "loss": 1.8887, "step": 3637 }, { "epoch": 0.74, "learning_rate": 0.00016901627968118888, "loss": 1.8662, "step": 3638 }, { "epoch": 0.74, "learning_rate": 0.00016876998941452292, "loss": 1.9814, "step": 3639 }, { "epoch": 0.74, "learning_rate": 0.00016852384228598967, "loss": 1.9238, "step": 3640 }, { "epoch": 0.74, "learning_rate": 0.00016827783840195915, "loss": 1.8379, "step": 3641 }, { "epoch": 0.74, "learning_rate": 0.00016803197786874013, "loss": 1.9619, "step": 3642 }, { "epoch": 0.74, "learning_rate": 0.00016778626079257903, "loss": 1.8809, "step": 3643 }, { "epoch": 0.74, "learning_rate": 0.00016754068727966003, "loss": 1.8154, "step": 3644 }, { "epoch": 0.74, "learning_rate": 0.00016729525743610575, "loss": 1.9131, "step": 3645 }, { "epoch": 0.74, "learning_rate": 0.00016704997136797673, "loss": 1.9258, "step": 3646 }, { "epoch": 0.74, "learning_rate": 0.00016680482918127087, "loss": 1.8535, "step": 3647 }, { "epoch": 0.74, "learning_rate": 0.0001665598309819245, "loss": 1.958, "step": 3648 }, { "epoch": 0.74, "learning_rate": 0.00016631497687581126, "loss": 1.8252, "step": 3649 }, { "epoch": 0.74, "learning_rate": 0.00016607026696874262, "loss": 1.8262, "step": 3650 }, { "epoch": 0.74, "learning_rate": 0.00016582570136646784, "loss": 1.8926, "step": 3651 }, { "epoch": 0.74, "learning_rate": 0.00016558128017467406, "loss": 1.748, "step": 3652 }, { "epoch": 0.74, "learning_rate": 0.00016533700349898552, "loss": 1.8984, "step": 3653 }, { "epoch": 0.74, "learning_rate": 0.0001650928714449641, "loss": 1.874, "step": 3654 }, { "epoch": 0.74, "learning_rate": 0.00016484888411810966, "loss": 1.8672, "step": 3655 }, { "epoch": 0.74, "learning_rate": 0.00016460504162385902, "loss": 2.0068, "step": 3656 }, { "epoch": 0.74, "learning_rate": 0.00016436134406758635, "loss": 1.9111, "step": 3657 }, { "epoch": 0.74, "learning_rate": 0.0001641177915546036, "loss": 1.9453, "step": 3658 }, { "epoch": 0.74, "learning_rate": 0.00016387438419016, "loss": 1.9736, "step": 3659 }, { "epoch": 0.74, "learning_rate": 0.00016363112207944152, "loss": 1.9795, "step": 3660 }, { "epoch": 0.74, "learning_rate": 0.0001633880053275722, "loss": 1.998, "step": 3661 }, { "epoch": 0.74, "learning_rate": 0.00016314503403961256, "loss": 1.8604, "step": 3662 }, { "epoch": 0.74, "learning_rate": 0.0001629022083205603, "loss": 1.916, "step": 3663 }, { "epoch": 0.74, "learning_rate": 0.0001626595282753508, "loss": 1.8154, "step": 3664 }, { "epoch": 0.74, "learning_rate": 0.00016241699400885575, "loss": 1.959, "step": 3665 }, { "epoch": 0.74, "learning_rate": 0.00016217460562588455, "loss": 1.9307, "step": 3666 }, { "epoch": 0.74, "learning_rate": 0.0001619323632311828, "loss": 1.8711, "step": 3667 }, { "epoch": 0.74, "learning_rate": 0.00016169026692943388, "loss": 1.7812, "step": 3668 }, { "epoch": 0.74, "learning_rate": 0.00016144831682525734, "loss": 1.9678, "step": 3669 }, { "epoch": 0.74, "learning_rate": 0.0001612065130232096, "loss": 1.9834, "step": 3670 }, { "epoch": 0.75, "learning_rate": 0.00016096485562778446, "loss": 1.9248, "step": 3671 }, { "epoch": 0.75, "learning_rate": 0.00016072334474341154, "loss": 1.9336, "step": 3672 }, { "epoch": 0.75, "learning_rate": 0.000160481980474458, "loss": 1.9385, "step": 3673 }, { "epoch": 0.75, "learning_rate": 0.00016024076292522733, "loss": 1.96, "step": 3674 }, { "epoch": 0.75, "learning_rate": 0.0001599996921999594, "loss": 1.8779, "step": 3675 }, { "epoch": 0.75, "learning_rate": 0.0001597587684028307, "loss": 1.8896, "step": 3676 }, { "epoch": 0.75, "learning_rate": 0.0001595179916379546, "loss": 1.8584, "step": 3677 }, { "epoch": 0.75, "learning_rate": 0.0001592773620093804, "loss": 1.8408, "step": 3678 }, { "epoch": 0.75, "learning_rate": 0.00015903687962109437, "loss": 1.8525, "step": 3679 }, { "epoch": 0.75, "learning_rate": 0.00015879654457701846, "loss": 2.0322, "step": 3680 }, { "epoch": 0.75, "learning_rate": 0.00015855635698101177, "loss": 1.8496, "step": 3681 }, { "epoch": 0.75, "learning_rate": 0.00015831631693686904, "loss": 1.8828, "step": 3682 }, { "epoch": 0.75, "learning_rate": 0.00015807642454832133, "loss": 1.792, "step": 3683 }, { "epoch": 0.75, "learning_rate": 0.00015783667991903634, "loss": 1.8789, "step": 3684 }, { "epoch": 0.75, "learning_rate": 0.0001575970831526172, "loss": 1.9746, "step": 3685 }, { "epoch": 0.75, "learning_rate": 0.00015735763435260382, "loss": 1.9365, "step": 3686 }, { "epoch": 0.75, "learning_rate": 0.00015711833362247202, "loss": 1.7979, "step": 3687 }, { "epoch": 0.75, "learning_rate": 0.00015687918106563326, "loss": 1.9541, "step": 3688 }, { "epoch": 0.75, "learning_rate": 0.00015664017678543519, "loss": 1.79, "step": 3689 }, { "epoch": 0.75, "learning_rate": 0.00015640132088516162, "loss": 1.9336, "step": 3690 }, { "epoch": 0.75, "learning_rate": 0.0001561626134680319, "loss": 1.9307, "step": 3691 }, { "epoch": 0.75, "learning_rate": 0.00015592405463720117, "loss": 1.8896, "step": 3692 }, { "epoch": 0.75, "learning_rate": 0.00015568564449576066, "loss": 1.9336, "step": 3693 }, { "epoch": 0.75, "learning_rate": 0.00015544738314673745, "loss": 1.9814, "step": 3694 }, { "epoch": 0.75, "learning_rate": 0.0001552092706930937, "loss": 1.9229, "step": 3695 }, { "epoch": 0.75, "learning_rate": 0.00015497130723772797, "loss": 1.7764, "step": 3696 }, { "epoch": 0.75, "learning_rate": 0.0001547334928834739, "loss": 1.875, "step": 3697 }, { "epoch": 0.75, "learning_rate": 0.00015449582773310067, "loss": 1.8115, "step": 3698 }, { "epoch": 0.75, "learning_rate": 0.00015425831188931356, "loss": 1.7969, "step": 3699 }, { "epoch": 0.75, "learning_rate": 0.00015402094545475254, "loss": 1.9326, "step": 3700 }, { "epoch": 0.75, "learning_rate": 0.00015378372853199385, "loss": 1.918, "step": 3701 }, { "epoch": 0.75, "learning_rate": 0.0001535466612235484, "loss": 1.9355, "step": 3702 }, { "epoch": 0.75, "learning_rate": 0.0001533097436318629, "loss": 1.8447, "step": 3703 }, { "epoch": 0.75, "learning_rate": 0.00015307297585931916, "loss": 1.8379, "step": 3704 }, { "epoch": 0.75, "learning_rate": 0.00015283635800823414, "loss": 1.9443, "step": 3705 }, { "epoch": 0.75, "learning_rate": 0.0001525998901808604, "loss": 1.8711, "step": 3706 }, { "epoch": 0.75, "learning_rate": 0.0001523635724793852, "loss": 1.9375, "step": 3707 }, { "epoch": 0.75, "learning_rate": 0.00015212740500593125, "loss": 1.9932, "step": 3708 }, { "epoch": 0.75, "learning_rate": 0.00015189138786255642, "loss": 1.8789, "step": 3709 }, { "epoch": 0.75, "learning_rate": 0.00015165552115125325, "loss": 1.9785, "step": 3710 }, { "epoch": 0.75, "learning_rate": 0.00015141980497394936, "loss": 1.8301, "step": 3711 }, { "epoch": 0.75, "learning_rate": 0.0001511842394325077, "loss": 1.9268, "step": 3712 }, { "epoch": 0.75, "learning_rate": 0.00015094882462872555, "loss": 1.9434, "step": 3713 }, { "epoch": 0.75, "learning_rate": 0.0001507135606643355, "loss": 2.0225, "step": 3714 }, { "epoch": 0.75, "learning_rate": 0.000150478447641005, "loss": 1.9561, "step": 3715 }, { "epoch": 0.75, "learning_rate": 0.0001502434856603358, "loss": 1.8193, "step": 3716 }, { "epoch": 0.75, "learning_rate": 0.00015000867482386476, "loss": 1.8555, "step": 3717 }, { "epoch": 0.75, "learning_rate": 0.0001497740152330631, "loss": 1.8408, "step": 3718 }, { "epoch": 0.75, "learning_rate": 0.00014953950698933722, "loss": 1.8486, "step": 3719 }, { "epoch": 0.76, "learning_rate": 0.0001493051501940275, "loss": 1.8555, "step": 3720 }, { "epoch": 0.76, "learning_rate": 0.00014907094494840927, "loss": 1.8984, "step": 3721 }, { "epoch": 0.76, "learning_rate": 0.00014883689135369255, "loss": 1.8291, "step": 3722 }, { "epoch": 0.76, "learning_rate": 0.00014860298951102126, "loss": 1.8311, "step": 3723 }, { "epoch": 0.76, "learning_rate": 0.00014836923952147386, "loss": 1.9053, "step": 3724 }, { "epoch": 0.76, "learning_rate": 0.00014813564148606374, "loss": 1.8896, "step": 3725 }, { "epoch": 0.76, "learning_rate": 0.00014790219550573798, "loss": 1.7988, "step": 3726 }, { "epoch": 0.76, "learning_rate": 0.00014766890168137813, "loss": 1.8301, "step": 3727 }, { "epoch": 0.76, "learning_rate": 0.00014743576011380029, "loss": 1.9062, "step": 3728 }, { "epoch": 0.76, "learning_rate": 0.0001472027709037545, "loss": 1.9463, "step": 3729 }, { "epoch": 0.76, "learning_rate": 0.00014696993415192484, "loss": 1.8555, "step": 3730 }, { "epoch": 0.76, "learning_rate": 0.00014673724995892983, "loss": 1.8027, "step": 3731 }, { "epoch": 0.76, "learning_rate": 0.00014650471842532194, "loss": 1.7715, "step": 3732 }, { "epoch": 0.76, "learning_rate": 0.00014627233965158743, "loss": 1.9746, "step": 3733 }, { "epoch": 0.76, "learning_rate": 0.00014604011373814667, "loss": 1.9297, "step": 3734 }, { "epoch": 0.76, "learning_rate": 0.00014580804078535424, "loss": 1.9932, "step": 3735 }, { "epoch": 0.76, "learning_rate": 0.00014557612089349854, "loss": 1.8701, "step": 3736 }, { "epoch": 0.76, "learning_rate": 0.00014534435416280135, "loss": 1.8857, "step": 3737 }, { "epoch": 0.76, "learning_rate": 0.00014511274069341896, "loss": 1.958, "step": 3738 }, { "epoch": 0.76, "learning_rate": 0.00014488128058544097, "loss": 1.8643, "step": 3739 }, { "epoch": 0.76, "learning_rate": 0.00014464997393889067, "loss": 1.9141, "step": 3740 }, { "epoch": 0.76, "learning_rate": 0.00014441882085372532, "loss": 1.8926, "step": 3741 }, { "epoch": 0.76, "learning_rate": 0.00014418782142983587, "loss": 1.8945, "step": 3742 }, { "epoch": 0.76, "learning_rate": 0.00014395697576704636, "loss": 1.9424, "step": 3743 }, { "epoch": 0.76, "learning_rate": 0.000143726283965115, "loss": 1.9648, "step": 3744 }, { "epoch": 0.76, "learning_rate": 0.0001434957461237331, "loss": 1.8691, "step": 3745 }, { "epoch": 0.76, "learning_rate": 0.0001432653623425254, "loss": 1.8535, "step": 3746 }, { "epoch": 0.76, "learning_rate": 0.00014303513272105056, "loss": 1.8428, "step": 3747 }, { "epoch": 0.76, "learning_rate": 0.0001428050573587999, "loss": 1.8857, "step": 3748 }, { "epoch": 0.76, "learning_rate": 0.00014257513635519865, "loss": 1.793, "step": 3749 }, { "epoch": 0.76, "learning_rate": 0.00014234536980960532, "loss": 1.874, "step": 3750 }, { "epoch": 0.76, "learning_rate": 0.0001421157578213113, "loss": 1.8867, "step": 3751 }, { "epoch": 0.76, "learning_rate": 0.00014188630048954148, "loss": 1.918, "step": 3752 }, { "epoch": 0.76, "learning_rate": 0.0001416569979134536, "loss": 1.8623, "step": 3753 }, { "epoch": 0.76, "learning_rate": 0.00014142785019213906, "loss": 1.8721, "step": 3754 }, { "epoch": 0.76, "learning_rate": 0.00014119885742462169, "loss": 1.9316, "step": 3755 }, { "epoch": 0.76, "learning_rate": 0.00014097001970985896, "loss": 1.9297, "step": 3756 }, { "epoch": 0.76, "learning_rate": 0.00014074133714674113, "loss": 1.9814, "step": 3757 }, { "epoch": 0.76, "learning_rate": 0.00014051280983409125, "loss": 1.792, "step": 3758 }, { "epoch": 0.76, "learning_rate": 0.0001402844378706652, "loss": 1.8926, "step": 3759 }, { "epoch": 0.76, "learning_rate": 0.00014005622135515223, "loss": 1.9629, "step": 3760 }, { "epoch": 0.76, "learning_rate": 0.00013982816038617398, "loss": 1.918, "step": 3761 }, { "epoch": 0.76, "learning_rate": 0.0001396002550622848, "loss": 1.8369, "step": 3762 }, { "epoch": 0.76, "learning_rate": 0.0001393725054819721, "loss": 1.8379, "step": 3763 }, { "epoch": 0.76, "learning_rate": 0.00013914491174365606, "loss": 1.8115, "step": 3764 }, { "epoch": 0.76, "learning_rate": 0.000138917473945689, "loss": 1.9082, "step": 3765 }, { "epoch": 0.76, "learning_rate": 0.00013869019218635642, "loss": 1.9219, "step": 3766 }, { "epoch": 0.76, "learning_rate": 0.00013846306656387604, "loss": 1.9766, "step": 3767 }, { "epoch": 0.76, "learning_rate": 0.0001382360971763982, "loss": 1.9062, "step": 3768 }, { "epoch": 0.76, "learning_rate": 0.00013800928412200546, "loss": 1.8721, "step": 3769 }, { "epoch": 0.77, "learning_rate": 0.00013778262749871334, "loss": 2.0176, "step": 3770 }, { "epoch": 0.77, "learning_rate": 0.00013755612740446965, "loss": 1.9453, "step": 3771 }, { "epoch": 0.77, "learning_rate": 0.00013732978393715407, "loss": 1.918, "step": 3772 }, { "epoch": 0.77, "learning_rate": 0.00013710359719457926, "loss": 1.7227, "step": 3773 }, { "epoch": 0.77, "learning_rate": 0.00013687756727448968, "loss": 1.9131, "step": 3774 }, { "epoch": 0.77, "learning_rate": 0.00013665169427456203, "loss": 1.8555, "step": 3775 }, { "epoch": 0.77, "learning_rate": 0.00013642597829240544, "loss": 1.8633, "step": 3776 }, { "epoch": 0.77, "learning_rate": 0.00013620041942556126, "loss": 1.9824, "step": 3777 }, { "epoch": 0.77, "learning_rate": 0.00013597501777150252, "loss": 1.9609, "step": 3778 }, { "epoch": 0.77, "learning_rate": 0.0001357497734276348, "loss": 1.8516, "step": 3779 }, { "epoch": 0.77, "learning_rate": 0.0001355246864912953, "loss": 1.8857, "step": 3780 }, { "epoch": 0.77, "learning_rate": 0.00013529975705975329, "loss": 1.876, "step": 3781 }, { "epoch": 0.77, "learning_rate": 0.00013507498523021022, "loss": 1.8291, "step": 3782 }, { "epoch": 0.77, "learning_rate": 0.00013485037109979908, "loss": 1.8945, "step": 3783 }, { "epoch": 0.77, "learning_rate": 0.00013462591476558512, "loss": 1.9043, "step": 3784 }, { "epoch": 0.77, "learning_rate": 0.00013440161632456482, "loss": 1.8809, "step": 3785 }, { "epoch": 0.77, "learning_rate": 0.00013417747587366719, "loss": 1.8682, "step": 3786 }, { "epoch": 0.77, "learning_rate": 0.0001339534935097524, "loss": 1.7871, "step": 3787 }, { "epoch": 0.77, "learning_rate": 0.00013372966932961218, "loss": 1.8398, "step": 3788 }, { "epoch": 0.77, "learning_rate": 0.00013350600342997061, "loss": 1.9209, "step": 3789 }, { "epoch": 0.77, "learning_rate": 0.00013328249590748253, "loss": 1.8799, "step": 3790 }, { "epoch": 0.77, "learning_rate": 0.00013305914685873499, "loss": 1.8975, "step": 3791 }, { "epoch": 0.77, "learning_rate": 0.00013283595638024636, "loss": 1.9404, "step": 3792 }, { "epoch": 0.77, "learning_rate": 0.00013261292456846646, "loss": 1.915, "step": 3793 }, { "epoch": 0.77, "learning_rate": 0.00013239005151977618, "loss": 1.7666, "step": 3794 }, { "epoch": 0.77, "learning_rate": 0.00013216733733048859, "loss": 1.9648, "step": 3795 }, { "epoch": 0.77, "learning_rate": 0.00013194478209684746, "loss": 1.876, "step": 3796 }, { "epoch": 0.77, "learning_rate": 0.00013172238591502793, "loss": 1.9141, "step": 3797 }, { "epoch": 0.77, "learning_rate": 0.0001315001488811367, "loss": 1.9404, "step": 3798 }, { "epoch": 0.77, "learning_rate": 0.00013127807109121164, "loss": 1.7627, "step": 3799 }, { "epoch": 0.77, "learning_rate": 0.00013105615264122155, "loss": 1.8721, "step": 3800 }, { "epoch": 0.77, "learning_rate": 0.00013083439362706673, "loss": 1.9004, "step": 3801 }, { "epoch": 0.77, "learning_rate": 0.00013061279414457823, "loss": 1.916, "step": 3802 }, { "epoch": 0.77, "learning_rate": 0.0001303913542895182, "loss": 1.751, "step": 3803 }, { "epoch": 0.77, "learning_rate": 0.0001301700741575801, "loss": 1.8682, "step": 3804 }, { "epoch": 0.77, "learning_rate": 0.00012994895384438825, "loss": 1.9785, "step": 3805 }, { "epoch": 0.77, "learning_rate": 0.0001297279934454978, "loss": 1.9795, "step": 3806 }, { "epoch": 0.77, "learning_rate": 0.00012950719305639463, "loss": 1.877, "step": 3807 }, { "epoch": 0.77, "learning_rate": 0.00012928655277249596, "loss": 1.8438, "step": 3808 }, { "epoch": 0.77, "learning_rate": 0.00012906607268914949, "loss": 1.8506, "step": 3809 }, { "epoch": 0.77, "learning_rate": 0.00012884575290163353, "loss": 1.9336, "step": 3810 }, { "epoch": 0.77, "learning_rate": 0.00012862559350515745, "loss": 1.8916, "step": 3811 }, { "epoch": 0.77, "learning_rate": 0.00012840559459486144, "loss": 1.7725, "step": 3812 }, { "epoch": 0.77, "learning_rate": 0.00012818575626581574, "loss": 1.8467, "step": 3813 }, { "epoch": 0.77, "learning_rate": 0.00012796607861302183, "loss": 1.8594, "step": 3814 }, { "epoch": 0.77, "learning_rate": 0.00012774656173141142, "loss": 1.7812, "step": 3815 }, { "epoch": 0.77, "learning_rate": 0.00012752720571584648, "loss": 1.8877, "step": 3816 }, { "epoch": 0.77, "learning_rate": 0.00012730801066112026, "loss": 1.9336, "step": 3817 }, { "epoch": 0.77, "learning_rate": 0.00012708897666195552, "loss": 1.8291, "step": 3818 }, { "epoch": 0.78, "learning_rate": 0.00012687010381300624, "loss": 1.8662, "step": 3819 }, { "epoch": 0.78, "learning_rate": 0.00012665139220885614, "loss": 1.8887, "step": 3820 }, { "epoch": 0.78, "learning_rate": 0.0001264328419440197, "loss": 1.8125, "step": 3821 }, { "epoch": 0.78, "learning_rate": 0.0001262144531129415, "loss": 1.9746, "step": 3822 }, { "epoch": 0.78, "learning_rate": 0.00012599622580999605, "loss": 1.8721, "step": 3823 }, { "epoch": 0.78, "learning_rate": 0.00012577816012948872, "loss": 1.918, "step": 3824 }, { "epoch": 0.78, "learning_rate": 0.00012556025616565443, "loss": 1.8066, "step": 3825 }, { "epoch": 0.78, "learning_rate": 0.0001253425140126585, "loss": 1.8379, "step": 3826 }, { "epoch": 0.78, "learning_rate": 0.0001251249337645965, "loss": 1.9141, "step": 3827 }, { "epoch": 0.78, "learning_rate": 0.00012490751551549368, "loss": 1.9854, "step": 3828 }, { "epoch": 0.78, "learning_rate": 0.00012469025935930522, "loss": 1.9209, "step": 3829 }, { "epoch": 0.78, "learning_rate": 0.00012447316538991682, "loss": 1.7939, "step": 3830 }, { "epoch": 0.78, "learning_rate": 0.00012425623370114353, "loss": 1.9141, "step": 3831 }, { "epoch": 0.78, "learning_rate": 0.0001240394643867303, "loss": 1.8203, "step": 3832 }, { "epoch": 0.78, "learning_rate": 0.00012382285754035228, "loss": 1.8232, "step": 3833 }, { "epoch": 0.78, "learning_rate": 0.00012360641325561434, "loss": 1.8711, "step": 3834 }, { "epoch": 0.78, "learning_rate": 0.0001233901316260508, "loss": 1.9238, "step": 3835 }, { "epoch": 0.78, "learning_rate": 0.0001231740127451258, "loss": 1.8574, "step": 3836 }, { "epoch": 0.78, "learning_rate": 0.0001229580567062334, "loss": 1.8486, "step": 3837 }, { "epoch": 0.78, "learning_rate": 0.00012274226360269687, "loss": 1.9727, "step": 3838 }, { "epoch": 0.78, "learning_rate": 0.0001225266335277694, "loss": 1.873, "step": 3839 }, { "epoch": 0.78, "learning_rate": 0.0001223111665746338, "loss": 1.8428, "step": 3840 }, { "epoch": 0.78, "learning_rate": 0.00012209586283640206, "loss": 1.8643, "step": 3841 }, { "epoch": 0.78, "learning_rate": 0.0001218807224061157, "loss": 1.8848, "step": 3842 }, { "epoch": 0.78, "learning_rate": 0.00012166574537674602, "loss": 1.8662, "step": 3843 }, { "epoch": 0.78, "learning_rate": 0.00012145093184119326, "loss": 1.8604, "step": 3844 }, { "epoch": 0.78, "learning_rate": 0.00012123628189228713, "loss": 1.8711, "step": 3845 }, { "epoch": 0.78, "learning_rate": 0.00012102179562278682, "loss": 1.8125, "step": 3846 }, { "epoch": 0.78, "learning_rate": 0.00012080747312538082, "loss": 1.8682, "step": 3847 }, { "epoch": 0.78, "learning_rate": 0.00012059331449268645, "loss": 1.8457, "step": 3848 }, { "epoch": 0.78, "learning_rate": 0.00012037931981725076, "loss": 1.8545, "step": 3849 }, { "epoch": 0.78, "learning_rate": 0.00012016548919154946, "loss": 1.877, "step": 3850 }, { "epoch": 0.78, "learning_rate": 0.00011995182270798766, "loss": 1.792, "step": 3851 }, { "epoch": 0.78, "learning_rate": 0.00011973832045889927, "loss": 1.9238, "step": 3852 }, { "epoch": 0.78, "learning_rate": 0.00011952498253654753, "loss": 1.9111, "step": 3853 }, { "epoch": 0.78, "learning_rate": 0.00011931180903312467, "loss": 1.9414, "step": 3854 }, { "epoch": 0.78, "learning_rate": 0.00011909880004075152, "loss": 1.9238, "step": 3855 }, { "epoch": 0.78, "learning_rate": 0.00011888595565147836, "loss": 1.873, "step": 3856 }, { "epoch": 0.78, "learning_rate": 0.00011867327595728372, "loss": 1.8037, "step": 3857 }, { "epoch": 0.78, "learning_rate": 0.00011846076105007526, "loss": 1.7227, "step": 3858 }, { "epoch": 0.78, "learning_rate": 0.00011824841102168965, "loss": 1.9561, "step": 3859 }, { "epoch": 0.78, "learning_rate": 0.00011803622596389185, "loss": 1.8506, "step": 3860 }, { "epoch": 0.78, "learning_rate": 0.00011782420596837585, "loss": 1.8545, "step": 3861 }, { "epoch": 0.78, "learning_rate": 0.00011761235112676433, "loss": 1.918, "step": 3862 }, { "epoch": 0.78, "learning_rate": 0.00011740066153060847, "loss": 1.8252, "step": 3863 }, { "epoch": 0.78, "learning_rate": 0.0001171891372713878, "loss": 1.8936, "step": 3864 }, { "epoch": 0.78, "learning_rate": 0.00011697777844051105, "loss": 1.8086, "step": 3865 }, { "epoch": 0.78, "learning_rate": 0.00011676658512931477, "loss": 1.7822, "step": 3866 }, { "epoch": 0.78, "learning_rate": 0.00011655555742906437, "loss": 1.8945, "step": 3867 }, { "epoch": 0.79, "learning_rate": 0.00011634469543095383, "loss": 1.8047, "step": 3868 }, { "epoch": 0.79, "learning_rate": 0.00011613399922610512, "loss": 1.8691, "step": 3869 }, { "epoch": 0.79, "learning_rate": 0.00011592346890556876, "loss": 1.7744, "step": 3870 }, { "epoch": 0.79, "learning_rate": 0.00011571310456032341, "loss": 2.0449, "step": 3871 }, { "epoch": 0.79, "learning_rate": 0.00011550290628127653, "loss": 1.9395, "step": 3872 }, { "epoch": 0.79, "learning_rate": 0.00011529287415926309, "loss": 1.8545, "step": 3873 }, { "epoch": 0.79, "learning_rate": 0.00011508300828504681, "loss": 1.8486, "step": 3874 }, { "epoch": 0.79, "learning_rate": 0.00011487330874931951, "loss": 1.9023, "step": 3875 }, { "epoch": 0.79, "learning_rate": 0.00011466377564270086, "loss": 2.042, "step": 3876 }, { "epoch": 0.79, "learning_rate": 0.00011445440905573857, "loss": 1.8652, "step": 3877 }, { "epoch": 0.79, "learning_rate": 0.00011424520907890889, "loss": 1.7891, "step": 3878 }, { "epoch": 0.79, "learning_rate": 0.0001140361758026156, "loss": 1.8076, "step": 3879 }, { "epoch": 0.79, "learning_rate": 0.00011382730931719042, "loss": 1.8418, "step": 3880 }, { "epoch": 0.79, "learning_rate": 0.0001136186097128934, "loss": 1.8379, "step": 3881 }, { "epoch": 0.79, "learning_rate": 0.00011341007707991225, "loss": 1.8301, "step": 3882 }, { "epoch": 0.79, "learning_rate": 0.00011320171150836228, "loss": 1.874, "step": 3883 }, { "epoch": 0.79, "learning_rate": 0.00011299351308828709, "loss": 1.7539, "step": 3884 }, { "epoch": 0.79, "learning_rate": 0.00011278548190965777, "loss": 1.8848, "step": 3885 }, { "epoch": 0.79, "learning_rate": 0.00011257761806237299, "loss": 1.9297, "step": 3886 }, { "epoch": 0.79, "learning_rate": 0.00011236992163625932, "loss": 1.8662, "step": 3887 }, { "epoch": 0.79, "learning_rate": 0.00011216239272107093, "loss": 1.8281, "step": 3888 }, { "epoch": 0.79, "learning_rate": 0.00011195503140648983, "loss": 1.8604, "step": 3889 }, { "epoch": 0.79, "learning_rate": 0.0001117478377821251, "loss": 1.96, "step": 3890 }, { "epoch": 0.79, "learning_rate": 0.00011154081193751387, "loss": 1.8467, "step": 3891 }, { "epoch": 0.79, "learning_rate": 0.00011133395396212048, "loss": 1.7969, "step": 3892 }, { "epoch": 0.79, "learning_rate": 0.00011112726394533662, "loss": 1.9297, "step": 3893 }, { "epoch": 0.79, "learning_rate": 0.00011092074197648178, "loss": 1.9277, "step": 3894 }, { "epoch": 0.79, "learning_rate": 0.00011071438814480239, "loss": 1.9551, "step": 3895 }, { "epoch": 0.79, "learning_rate": 0.00011050820253947259, "loss": 1.8574, "step": 3896 }, { "epoch": 0.79, "learning_rate": 0.00011030218524959373, "loss": 1.917, "step": 3897 }, { "epoch": 0.79, "learning_rate": 0.00011009633636419431, "loss": 1.9639, "step": 3898 }, { "epoch": 0.79, "learning_rate": 0.00010989065597222996, "loss": 1.8936, "step": 3899 }, { "epoch": 0.79, "learning_rate": 0.0001096851441625839, "loss": 1.8594, "step": 3900 }, { "epoch": 0.79, "learning_rate": 0.00010947980102406597, "loss": 1.9004, "step": 3901 }, { "epoch": 0.79, "learning_rate": 0.00010927462664541377, "loss": 1.9502, "step": 3902 }, { "epoch": 0.79, "learning_rate": 0.00010906962111529118, "loss": 1.9209, "step": 3903 }, { "epoch": 0.79, "learning_rate": 0.00010886478452228987, "loss": 1.8438, "step": 3904 }, { "epoch": 0.79, "learning_rate": 0.00010866011695492812, "loss": 1.7139, "step": 3905 }, { "epoch": 0.79, "learning_rate": 0.00010845561850165093, "loss": 1.9111, "step": 3906 }, { "epoch": 0.79, "learning_rate": 0.00010825128925083089, "loss": 1.8584, "step": 3907 }, { "epoch": 0.79, "learning_rate": 0.00010804712929076676, "loss": 1.8184, "step": 3908 }, { "epoch": 0.79, "learning_rate": 0.00010784313870968465, "loss": 1.833, "step": 3909 }, { "epoch": 0.79, "learning_rate": 0.00010763931759573737, "loss": 1.9111, "step": 3910 }, { "epoch": 0.79, "learning_rate": 0.00010743566603700433, "loss": 1.9727, "step": 3911 }, { "epoch": 0.79, "learning_rate": 0.0001072321841214916, "loss": 1.8154, "step": 3912 }, { "epoch": 0.79, "learning_rate": 0.0001070288719371324, "loss": 1.8721, "step": 3913 }, { "epoch": 0.79, "learning_rate": 0.00010682572957178616, "loss": 1.8438, "step": 3914 }, { "epoch": 0.79, "learning_rate": 0.00010662275711323888, "loss": 1.876, "step": 3915 }, { "epoch": 0.79, "learning_rate": 0.00010641995464920345, "loss": 1.8232, "step": 3916 }, { "epoch": 0.8, "learning_rate": 0.00010621732226731939, "loss": 1.8984, "step": 3917 }, { "epoch": 0.8, "learning_rate": 0.00010601486005515216, "loss": 1.7236, "step": 3918 }, { "epoch": 0.8, "learning_rate": 0.00010581256810019435, "loss": 1.7373, "step": 3919 }, { "epoch": 0.8, "learning_rate": 0.00010561044648986445, "loss": 1.9219, "step": 3920 }, { "epoch": 0.8, "learning_rate": 0.00010540849531150754, "loss": 1.9453, "step": 3921 }, { "epoch": 0.8, "learning_rate": 0.00010520671465239496, "loss": 1.876, "step": 3922 }, { "epoch": 0.8, "learning_rate": 0.00010500510459972451, "loss": 1.9541, "step": 3923 }, { "epoch": 0.8, "learning_rate": 0.00010480366524062041, "loss": 1.8506, "step": 3924 }, { "epoch": 0.8, "learning_rate": 0.00010460239666213261, "loss": 1.8711, "step": 3925 }, { "epoch": 0.8, "learning_rate": 0.0001044012989512378, "loss": 1.8877, "step": 3926 }, { "epoch": 0.8, "learning_rate": 0.00010420037219483841, "loss": 1.9512, "step": 3927 }, { "epoch": 0.8, "learning_rate": 0.00010399961647976313, "loss": 1.7539, "step": 3928 }, { "epoch": 0.8, "learning_rate": 0.00010379903189276684, "loss": 1.8545, "step": 3929 }, { "epoch": 0.8, "learning_rate": 0.0001035986185205306, "loss": 1.8301, "step": 3930 }, { "epoch": 0.8, "learning_rate": 0.000103398376449661, "loss": 1.8291, "step": 3931 }, { "epoch": 0.8, "learning_rate": 0.0001031983057666911, "loss": 1.8955, "step": 3932 }, { "epoch": 0.8, "learning_rate": 0.00010299840655807957, "loss": 1.7949, "step": 3933 }, { "epoch": 0.8, "learning_rate": 0.000102798678910211, "loss": 1.9316, "step": 3934 }, { "epoch": 0.8, "learning_rate": 0.00010259912290939616, "loss": 1.918, "step": 3935 }, { "epoch": 0.8, "learning_rate": 0.00010239973864187118, "loss": 1.8428, "step": 3936 }, { "epoch": 0.8, "learning_rate": 0.00010220052619379844, "loss": 1.8574, "step": 3937 }, { "epoch": 0.8, "learning_rate": 0.00010200148565126566, "loss": 1.8076, "step": 3938 }, { "epoch": 0.8, "learning_rate": 0.00010180261710028665, "loss": 1.9551, "step": 3939 }, { "epoch": 0.8, "learning_rate": 0.00010160392062680058, "loss": 1.8838, "step": 3940 }, { "epoch": 0.8, "learning_rate": 0.00010140539631667228, "loss": 1.7568, "step": 3941 }, { "epoch": 0.8, "learning_rate": 0.00010120704425569249, "loss": 1.7656, "step": 3942 }, { "epoch": 0.8, "learning_rate": 0.00010100886452957714, "loss": 1.9424, "step": 3943 }, { "epoch": 0.8, "learning_rate": 0.00010081085722396788, "loss": 1.9512, "step": 3944 }, { "epoch": 0.8, "learning_rate": 0.00010061302242443204, "loss": 1.7568, "step": 3945 }, { "epoch": 0.8, "learning_rate": 0.00010041536021646203, "loss": 1.835, "step": 3946 }, { "epoch": 0.8, "learning_rate": 0.00010021787068547567, "loss": 1.8711, "step": 3947 }, { "epoch": 0.8, "learning_rate": 0.00010002055391681658, "loss": 1.9043, "step": 3948 }, { "epoch": 0.8, "learning_rate": 9.982340999575334e-05, "loss": 1.9629, "step": 3949 }, { "epoch": 0.8, "learning_rate": 9.962643900747992e-05, "loss": 1.9023, "step": 3950 }, { "epoch": 0.8, "learning_rate": 9.94296410371156e-05, "loss": 1.9395, "step": 3951 }, { "epoch": 0.8, "learning_rate": 9.923301616970509e-05, "loss": 1.8887, "step": 3952 }, { "epoch": 0.8, "learning_rate": 9.903656449021797e-05, "loss": 1.9482, "step": 3953 }, { "epoch": 0.8, "learning_rate": 9.884028608354895e-05, "loss": 1.8242, "step": 3954 }, { "epoch": 0.8, "learning_rate": 9.864418103451828e-05, "loss": 1.8271, "step": 3955 }, { "epoch": 0.8, "learning_rate": 9.844824942787072e-05, "loss": 1.8711, "step": 3956 }, { "epoch": 0.8, "learning_rate": 9.825249134827663e-05, "loss": 1.8936, "step": 3957 }, { "epoch": 0.8, "learning_rate": 9.805690688033114e-05, "loss": 1.8154, "step": 3958 }, { "epoch": 0.8, "learning_rate": 9.786149610855439e-05, "loss": 1.7695, "step": 3959 }, { "epoch": 0.8, "learning_rate": 9.766625911739113e-05, "loss": 1.8926, "step": 3960 }, { "epoch": 0.8, "learning_rate": 9.747119599121163e-05, "loss": 1.8779, "step": 3961 }, { "epoch": 0.8, "learning_rate": 9.72763068143106e-05, "loss": 1.8838, "step": 3962 }, { "epoch": 0.8, "learning_rate": 9.708159167090753e-05, "loss": 1.9326, "step": 3963 }, { "epoch": 0.8, "learning_rate": 9.688705064514702e-05, "loss": 1.8418, "step": 3964 }, { "epoch": 0.8, "learning_rate": 9.66926838210983e-05, "loss": 1.9795, "step": 3965 }, { "epoch": 0.8, "learning_rate": 9.649849128275512e-05, "loss": 1.9844, "step": 3966 }, { "epoch": 0.81, "learning_rate": 9.630447311403623e-05, "loss": 1.8594, "step": 3967 }, { "epoch": 0.81, "learning_rate": 9.61106293987849e-05, "loss": 1.834, "step": 3968 }, { "epoch": 0.81, "learning_rate": 9.591696022076868e-05, "loss": 1.8066, "step": 3969 }, { "epoch": 0.81, "learning_rate": 9.57234656636804e-05, "loss": 1.8965, "step": 3970 }, { "epoch": 0.81, "learning_rate": 9.553014581113667e-05, "loss": 1.8838, "step": 3971 }, { "epoch": 0.81, "learning_rate": 9.533700074667927e-05, "loss": 1.9121, "step": 3972 }, { "epoch": 0.81, "learning_rate": 9.514403055377385e-05, "loss": 1.8037, "step": 3973 }, { "epoch": 0.81, "learning_rate": 9.495123531581113e-05, "loss": 1.9482, "step": 3974 }, { "epoch": 0.81, "learning_rate": 9.475861511610562e-05, "loss": 1.8545, "step": 3975 }, { "epoch": 0.81, "learning_rate": 9.456617003789642e-05, "loss": 1.8105, "step": 3976 }, { "epoch": 0.81, "learning_rate": 9.43739001643471e-05, "loss": 1.9395, "step": 3977 }, { "epoch": 0.81, "learning_rate": 9.418180557854528e-05, "loss": 1.8555, "step": 3978 }, { "epoch": 0.81, "learning_rate": 9.398988636350303e-05, "loss": 1.8672, "step": 3979 }, { "epoch": 0.81, "learning_rate": 9.379814260215663e-05, "loss": 1.9043, "step": 3980 }, { "epoch": 0.81, "learning_rate": 9.360657437736636e-05, "loss": 1.9199, "step": 3981 }, { "epoch": 0.81, "learning_rate": 9.34151817719166e-05, "loss": 1.8154, "step": 3982 }, { "epoch": 0.81, "learning_rate": 9.322396486851626e-05, "loss": 1.9678, "step": 3983 }, { "epoch": 0.81, "learning_rate": 9.303292374979794e-05, "loss": 1.9141, "step": 3984 }, { "epoch": 0.81, "learning_rate": 9.284205849831817e-05, "loss": 1.8789, "step": 3985 }, { "epoch": 0.81, "learning_rate": 9.265136919655787e-05, "loss": 1.8936, "step": 3986 }, { "epoch": 0.81, "learning_rate": 9.246085592692183e-05, "loss": 1.9062, "step": 3987 }, { "epoch": 0.81, "learning_rate": 9.227051877173859e-05, "loss": 1.8818, "step": 3988 }, { "epoch": 0.81, "learning_rate": 9.208035781326057e-05, "loss": 1.9062, "step": 3989 }, { "epoch": 0.81, "learning_rate": 9.189037313366439e-05, "loss": 1.8438, "step": 3990 }, { "epoch": 0.81, "learning_rate": 9.170056481504996e-05, "loss": 1.9062, "step": 3991 }, { "epoch": 0.81, "learning_rate": 9.151093293944141e-05, "loss": 1.7617, "step": 3992 }, { "epoch": 0.81, "learning_rate": 9.132147758878668e-05, "loss": 1.792, "step": 3993 }, { "epoch": 0.81, "learning_rate": 9.113219884495699e-05, "loss": 1.8252, "step": 3994 }, { "epoch": 0.81, "learning_rate": 9.09430967897475e-05, "loss": 1.9346, "step": 3995 }, { "epoch": 0.81, "learning_rate": 9.075417150487713e-05, "loss": 1.8408, "step": 3996 }, { "epoch": 0.81, "learning_rate": 9.056542307198823e-05, "loss": 1.874, "step": 3997 }, { "epoch": 0.81, "learning_rate": 9.037685157264658e-05, "loss": 1.9814, "step": 3998 }, { "epoch": 0.81, "learning_rate": 9.018845708834189e-05, "loss": 1.8232, "step": 3999 }, { "epoch": 0.81, "learning_rate": 9.000023970048726e-05, "loss": 1.8701, "step": 4000 }, { "epoch": 0.81, "learning_rate": 8.981219949041891e-05, "loss": 1.7861, "step": 4001 }, { "epoch": 0.81, "learning_rate": 8.962433653939705e-05, "loss": 1.8701, "step": 4002 }, { "epoch": 0.81, "learning_rate": 8.943665092860487e-05, "loss": 1.8574, "step": 4003 }, { "epoch": 0.81, "learning_rate": 8.924914273914903e-05, "loss": 1.8633, "step": 4004 }, { "epoch": 0.81, "learning_rate": 8.906181205205943e-05, "loss": 1.9053, "step": 4005 }, { "epoch": 0.81, "learning_rate": 8.88746589482895e-05, "loss": 1.9092, "step": 4006 }, { "epoch": 0.81, "learning_rate": 8.868768350871592e-05, "loss": 1.7559, "step": 4007 }, { "epoch": 0.81, "learning_rate": 8.850088581413829e-05, "loss": 1.959, "step": 4008 }, { "epoch": 0.81, "learning_rate": 8.831426594527975e-05, "loss": 1.8926, "step": 4009 }, { "epoch": 0.81, "learning_rate": 8.81278239827864e-05, "loss": 1.8594, "step": 4010 }, { "epoch": 0.81, "learning_rate": 8.794156000722731e-05, "loss": 1.9141, "step": 4011 }, { "epoch": 0.81, "learning_rate": 8.775547409909501e-05, "loss": 1.8037, "step": 4012 }, { "epoch": 0.81, "learning_rate": 8.756956633880475e-05, "loss": 1.9043, "step": 4013 }, { "epoch": 0.81, "learning_rate": 8.738383680669504e-05, "loss": 1.9648, "step": 4014 }, { "epoch": 0.81, "learning_rate": 8.719828558302733e-05, "loss": 1.8701, "step": 4015 }, { "epoch": 0.82, "learning_rate": 8.701291274798584e-05, "loss": 1.9277, "step": 4016 }, { "epoch": 0.82, "learning_rate": 8.682771838167774e-05, "loss": 1.9893, "step": 4017 }, { "epoch": 0.82, "learning_rate": 8.664270256413331e-05, "loss": 1.8428, "step": 4018 }, { "epoch": 0.82, "learning_rate": 8.645786537530526e-05, "loss": 1.8477, "step": 4019 }, { "epoch": 0.82, "learning_rate": 8.627320689506956e-05, "loss": 1.8633, "step": 4020 }, { "epoch": 0.82, "learning_rate": 8.608872720322481e-05, "loss": 1.8008, "step": 4021 }, { "epoch": 0.82, "learning_rate": 8.59044263794922e-05, "loss": 1.9248, "step": 4022 }, { "epoch": 0.82, "learning_rate": 8.572030450351565e-05, "loss": 1.8096, "step": 4023 }, { "epoch": 0.82, "learning_rate": 8.553636165486173e-05, "loss": 1.8828, "step": 4024 }, { "epoch": 0.82, "learning_rate": 8.535259791301996e-05, "loss": 1.9131, "step": 4025 }, { "epoch": 0.82, "learning_rate": 8.516901335740196e-05, "loss": 1.915, "step": 4026 }, { "epoch": 0.82, "learning_rate": 8.498560806734229e-05, "loss": 1.8135, "step": 4027 }, { "epoch": 0.82, "learning_rate": 8.480238212209812e-05, "loss": 1.7637, "step": 4028 }, { "epoch": 0.82, "learning_rate": 8.461933560084878e-05, "loss": 1.7793, "step": 4029 }, { "epoch": 0.82, "learning_rate": 8.443646858269604e-05, "loss": 1.9092, "step": 4030 }, { "epoch": 0.82, "learning_rate": 8.425378114666455e-05, "loss": 1.7979, "step": 4031 }, { "epoch": 0.82, "learning_rate": 8.407127337170096e-05, "loss": 1.8691, "step": 4032 }, { "epoch": 0.82, "learning_rate": 8.388894533667429e-05, "loss": 1.9287, "step": 4033 }, { "epoch": 0.82, "learning_rate": 8.37067971203761e-05, "loss": 1.9863, "step": 4034 }, { "epoch": 0.82, "learning_rate": 8.352482880152023e-05, "loss": 1.9609, "step": 4035 }, { "epoch": 0.82, "learning_rate": 8.334304045874247e-05, "loss": 1.9248, "step": 4036 }, { "epoch": 0.82, "learning_rate": 8.316143217060129e-05, "loss": 1.9561, "step": 4037 }, { "epoch": 0.82, "learning_rate": 8.298000401557704e-05, "loss": 1.748, "step": 4038 }, { "epoch": 0.82, "learning_rate": 8.279875607207226e-05, "loss": 1.8818, "step": 4039 }, { "epoch": 0.82, "learning_rate": 8.26176884184115e-05, "loss": 1.8584, "step": 4040 }, { "epoch": 0.82, "learning_rate": 8.243680113284169e-05, "loss": 1.8174, "step": 4041 }, { "epoch": 0.82, "learning_rate": 8.225609429353187e-05, "loss": 1.9326, "step": 4042 }, { "epoch": 0.82, "learning_rate": 8.207556797857252e-05, "loss": 1.8936, "step": 4043 }, { "epoch": 0.82, "learning_rate": 8.189522226597689e-05, "loss": 1.9111, "step": 4044 }, { "epoch": 0.82, "learning_rate": 8.171505723367961e-05, "loss": 1.8311, "step": 4045 }, { "epoch": 0.82, "learning_rate": 8.153507295953727e-05, "loss": 1.8555, "step": 4046 }, { "epoch": 0.82, "learning_rate": 8.135526952132877e-05, "loss": 1.9023, "step": 4047 }, { "epoch": 0.82, "learning_rate": 8.117564699675434e-05, "loss": 1.9326, "step": 4048 }, { "epoch": 0.82, "learning_rate": 8.099620546343634e-05, "loss": 1.9004, "step": 4049 }, { "epoch": 0.82, "learning_rate": 8.081694499891901e-05, "loss": 1.8887, "step": 4050 }, { "epoch": 0.82, "learning_rate": 8.063786568066805e-05, "loss": 1.8945, "step": 4051 }, { "epoch": 0.82, "learning_rate": 8.045896758607091e-05, "loss": 1.8896, "step": 4052 }, { "epoch": 0.82, "learning_rate": 8.028025079243705e-05, "loss": 1.874, "step": 4053 }, { "epoch": 0.82, "learning_rate": 8.010171537699713e-05, "loss": 1.8564, "step": 4054 }, { "epoch": 0.82, "learning_rate": 7.992336141690393e-05, "loss": 1.874, "step": 4055 }, { "epoch": 0.82, "learning_rate": 7.974518898923117e-05, "loss": 1.9141, "step": 4056 }, { "epoch": 0.82, "learning_rate": 7.95671981709749e-05, "loss": 1.9346, "step": 4057 }, { "epoch": 0.82, "learning_rate": 7.938938903905207e-05, "loss": 1.8848, "step": 4058 }, { "epoch": 0.82, "learning_rate": 7.921176167030125e-05, "loss": 1.874, "step": 4059 }, { "epoch": 0.82, "learning_rate": 7.903431614148276e-05, "loss": 1.8008, "step": 4060 }, { "epoch": 0.82, "learning_rate": 7.885705252927788e-05, "loss": 1.8457, "step": 4061 }, { "epoch": 0.82, "learning_rate": 7.867997091028966e-05, "loss": 1.8877, "step": 4062 }, { "epoch": 0.82, "learning_rate": 7.850307136104246e-05, "loss": 1.8193, "step": 4063 }, { "epoch": 0.82, "learning_rate": 7.832635395798176e-05, "loss": 1.8809, "step": 4064 }, { "epoch": 0.83, "learning_rate": 7.814981877747434e-05, "loss": 1.835, "step": 4065 }, { "epoch": 0.83, "learning_rate": 7.79734658958085e-05, "loss": 1.8135, "step": 4066 }, { "epoch": 0.83, "learning_rate": 7.779729538919351e-05, "loss": 1.8438, "step": 4067 }, { "epoch": 0.83, "learning_rate": 7.762130733375971e-05, "loss": 1.9443, "step": 4068 }, { "epoch": 0.83, "learning_rate": 7.744550180555903e-05, "loss": 1.8896, "step": 4069 }, { "epoch": 0.83, "learning_rate": 7.726987888056431e-05, "loss": 1.8906, "step": 4070 }, { "epoch": 0.83, "learning_rate": 7.709443863466925e-05, "loss": 1.8291, "step": 4071 }, { "epoch": 0.83, "learning_rate": 7.691918114368906e-05, "loss": 1.9512, "step": 4072 }, { "epoch": 0.83, "learning_rate": 7.674410648335956e-05, "loss": 1.8379, "step": 4073 }, { "epoch": 0.83, "learning_rate": 7.65692147293377e-05, "loss": 1.8818, "step": 4074 }, { "epoch": 0.83, "learning_rate": 7.639450595720137e-05, "loss": 1.9053, "step": 4075 }, { "epoch": 0.83, "learning_rate": 7.621998024244948e-05, "loss": 1.8799, "step": 4076 }, { "epoch": 0.83, "learning_rate": 7.604563766050199e-05, "loss": 1.9473, "step": 4077 }, { "epoch": 0.83, "learning_rate": 7.587147828669915e-05, "loss": 1.9619, "step": 4078 }, { "epoch": 0.83, "learning_rate": 7.569750219630272e-05, "loss": 1.877, "step": 4079 }, { "epoch": 0.83, "learning_rate": 7.552370946449483e-05, "loss": 1.7861, "step": 4080 }, { "epoch": 0.83, "learning_rate": 7.535010016637827e-05, "loss": 1.8369, "step": 4081 }, { "epoch": 0.83, "learning_rate": 7.517667437697695e-05, "loss": 1.8711, "step": 4082 }, { "epoch": 0.83, "learning_rate": 7.500343217123546e-05, "loss": 1.915, "step": 4083 }, { "epoch": 0.83, "learning_rate": 7.483037362401856e-05, "loss": 1.8945, "step": 4084 }, { "epoch": 0.83, "learning_rate": 7.46574988101123e-05, "loss": 1.8018, "step": 4085 }, { "epoch": 0.83, "learning_rate": 7.448480780422285e-05, "loss": 1.8047, "step": 4086 }, { "epoch": 0.83, "learning_rate": 7.4312300680977e-05, "loss": 1.8682, "step": 4087 }, { "epoch": 0.83, "learning_rate": 7.41399775149224e-05, "loss": 1.875, "step": 4088 }, { "epoch": 0.83, "learning_rate": 7.396783838052679e-05, "loss": 1.9199, "step": 4089 }, { "epoch": 0.83, "learning_rate": 7.379588335217874e-05, "loss": 1.8008, "step": 4090 }, { "epoch": 0.83, "learning_rate": 7.3624112504187e-05, "loss": 1.8408, "step": 4091 }, { "epoch": 0.83, "learning_rate": 7.345252591078088e-05, "loss": 1.8408, "step": 4092 }, { "epoch": 0.83, "learning_rate": 7.328112364611012e-05, "loss": 1.834, "step": 4093 }, { "epoch": 0.83, "learning_rate": 7.310990578424437e-05, "loss": 1.8223, "step": 4094 }, { "epoch": 0.83, "learning_rate": 7.293887239917429e-05, "loss": 1.8359, "step": 4095 }, { "epoch": 0.83, "learning_rate": 7.276802356481016e-05, "loss": 1.8643, "step": 4096 }, { "epoch": 0.83, "learning_rate": 7.259735935498295e-05, "loss": 1.8447, "step": 4097 }, { "epoch": 0.83, "learning_rate": 7.242687984344382e-05, "loss": 1.8838, "step": 4098 }, { "epoch": 0.83, "learning_rate": 7.225658510386385e-05, "loss": 1.8438, "step": 4099 }, { "epoch": 0.83, "learning_rate": 7.208647520983425e-05, "loss": 1.8477, "step": 4100 }, { "epoch": 0.83, "learning_rate": 7.191655023486682e-05, "loss": 1.8389, "step": 4101 }, { "epoch": 0.83, "learning_rate": 7.174681025239299e-05, "loss": 1.8623, "step": 4102 }, { "epoch": 0.83, "learning_rate": 7.157725533576426e-05, "loss": 1.8174, "step": 4103 }, { "epoch": 0.83, "learning_rate": 7.14078855582525e-05, "loss": 1.9238, "step": 4104 }, { "epoch": 0.83, "learning_rate": 7.123870099304941e-05, "loss": 1.833, "step": 4105 }, { "epoch": 0.83, "learning_rate": 7.106970171326649e-05, "loss": 1.9785, "step": 4106 }, { "epoch": 0.83, "learning_rate": 7.090088779193516e-05, "loss": 1.7646, "step": 4107 }, { "epoch": 0.83, "learning_rate": 7.073225930200722e-05, "loss": 1.8799, "step": 4108 }, { "epoch": 0.83, "learning_rate": 7.05638163163536e-05, "loss": 1.9141, "step": 4109 }, { "epoch": 0.83, "learning_rate": 7.039555890776578e-05, "loss": 1.9014, "step": 4110 }, { "epoch": 0.83, "learning_rate": 7.022748714895444e-05, "loss": 1.8662, "step": 4111 }, { "epoch": 0.83, "learning_rate": 7.00596011125506e-05, "loss": 1.8525, "step": 4112 }, { "epoch": 0.83, "learning_rate": 6.989190087110442e-05, "loss": 1.8701, "step": 4113 }, { "epoch": 0.83, "learning_rate": 6.97243864970864e-05, "loss": 1.8867, "step": 4114 }, { "epoch": 0.84, "learning_rate": 6.955705806288626e-05, "loss": 1.8135, "step": 4115 }, { "epoch": 0.84, "learning_rate": 6.938991564081332e-05, "loss": 1.9307, "step": 4116 }, { "epoch": 0.84, "learning_rate": 6.922295930309692e-05, "loss": 1.8018, "step": 4117 }, { "epoch": 0.84, "learning_rate": 6.905618912188582e-05, "loss": 1.8262, "step": 4118 }, { "epoch": 0.84, "learning_rate": 6.888960516924808e-05, "loss": 1.8467, "step": 4119 }, { "epoch": 0.84, "learning_rate": 6.872320751717175e-05, "loss": 1.749, "step": 4120 }, { "epoch": 0.84, "learning_rate": 6.855699623756395e-05, "loss": 1.8877, "step": 4121 }, { "epoch": 0.84, "learning_rate": 6.839097140225136e-05, "loss": 1.8232, "step": 4122 }, { "epoch": 0.84, "learning_rate": 6.822513308298028e-05, "loss": 1.7822, "step": 4123 }, { "epoch": 0.84, "learning_rate": 6.805948135141616e-05, "loss": 1.7988, "step": 4124 }, { "epoch": 0.84, "learning_rate": 6.78940162791441e-05, "loss": 1.8271, "step": 4125 }, { "epoch": 0.84, "learning_rate": 6.772873793766815e-05, "loss": 1.9014, "step": 4126 }, { "epoch": 0.84, "learning_rate": 6.756364639841218e-05, "loss": 1.834, "step": 4127 }, { "epoch": 0.84, "learning_rate": 6.739874173271882e-05, "loss": 1.8965, "step": 4128 }, { "epoch": 0.84, "learning_rate": 6.723402401185014e-05, "loss": 1.917, "step": 4129 }, { "epoch": 0.84, "learning_rate": 6.70694933069877e-05, "loss": 1.8809, "step": 4130 }, { "epoch": 0.84, "learning_rate": 6.690514968923167e-05, "loss": 1.8945, "step": 4131 }, { "epoch": 0.84, "learning_rate": 6.674099322960186e-05, "loss": 1.7842, "step": 4132 }, { "epoch": 0.84, "learning_rate": 6.65770239990372e-05, "loss": 1.8174, "step": 4133 }, { "epoch": 0.84, "learning_rate": 6.641324206839539e-05, "loss": 1.9375, "step": 4134 }, { "epoch": 0.84, "learning_rate": 6.624964750845325e-05, "loss": 1.8594, "step": 4135 }, { "epoch": 0.84, "learning_rate": 6.608624038990696e-05, "loss": 1.9766, "step": 4136 }, { "epoch": 0.84, "learning_rate": 6.59230207833713e-05, "loss": 1.8135, "step": 4137 }, { "epoch": 0.84, "learning_rate": 6.575998875938016e-05, "loss": 1.8623, "step": 4138 }, { "epoch": 0.84, "learning_rate": 6.559714438838644e-05, "loss": 1.8613, "step": 4139 }, { "epoch": 0.84, "learning_rate": 6.5434487740762e-05, "loss": 1.874, "step": 4140 }, { "epoch": 0.84, "learning_rate": 6.527201888679746e-05, "loss": 1.916, "step": 4141 }, { "epoch": 0.84, "learning_rate": 6.51097378967021e-05, "loss": 1.8604, "step": 4142 }, { "epoch": 0.84, "learning_rate": 6.494764484060444e-05, "loss": 1.8682, "step": 4143 }, { "epoch": 0.84, "learning_rate": 6.478573978855146e-05, "loss": 1.8896, "step": 4144 }, { "epoch": 0.84, "learning_rate": 6.4624022810509e-05, "loss": 1.791, "step": 4145 }, { "epoch": 0.84, "learning_rate": 6.446249397636172e-05, "loss": 1.8818, "step": 4146 }, { "epoch": 0.84, "learning_rate": 6.430115335591291e-05, "loss": 1.8145, "step": 4147 }, { "epoch": 0.84, "learning_rate": 6.414000101888428e-05, "loss": 1.8506, "step": 4148 }, { "epoch": 0.84, "learning_rate": 6.397903703491664e-05, "loss": 1.8486, "step": 4149 }, { "epoch": 0.84, "learning_rate": 6.381826147356907e-05, "loss": 1.8438, "step": 4150 }, { "epoch": 0.84, "learning_rate": 6.365767440431914e-05, "loss": 1.877, "step": 4151 }, { "epoch": 0.84, "learning_rate": 6.349727589656329e-05, "loss": 1.8359, "step": 4152 }, { "epoch": 0.84, "learning_rate": 6.333706601961642e-05, "loss": 1.8066, "step": 4153 }, { "epoch": 0.84, "learning_rate": 6.317704484271164e-05, "loss": 1.873, "step": 4154 }, { "epoch": 0.84, "learning_rate": 6.301721243500092e-05, "loss": 1.8643, "step": 4155 }, { "epoch": 0.84, "learning_rate": 6.285756886555422e-05, "loss": 1.8594, "step": 4156 }, { "epoch": 0.84, "learning_rate": 6.269811420336019e-05, "loss": 1.7295, "step": 4157 }, { "epoch": 0.84, "learning_rate": 6.25388485173256e-05, "loss": 1.8164, "step": 4158 }, { "epoch": 0.84, "learning_rate": 6.237977187627586e-05, "loss": 1.917, "step": 4159 }, { "epoch": 0.84, "learning_rate": 6.222088434895462e-05, "loss": 1.8057, "step": 4160 }, { "epoch": 0.84, "learning_rate": 6.206218600402352e-05, "loss": 1.8994, "step": 4161 }, { "epoch": 0.84, "learning_rate": 6.190367691006288e-05, "loss": 1.8018, "step": 4162 }, { "epoch": 0.84, "learning_rate": 6.174535713557089e-05, "loss": 1.9492, "step": 4163 }, { "epoch": 0.85, "learning_rate": 6.1587226748964e-05, "loss": 1.9307, "step": 4164 }, { "epoch": 0.85, "learning_rate": 6.142928581857693e-05, "loss": 1.7812, "step": 4165 }, { "epoch": 0.85, "learning_rate": 6.127153441266236e-05, "loss": 1.9248, "step": 4166 }, { "epoch": 0.85, "learning_rate": 6.111397259939128e-05, "loss": 1.7568, "step": 4167 }, { "epoch": 0.85, "learning_rate": 6.095660044685275e-05, "loss": 1.9043, "step": 4168 }, { "epoch": 0.85, "learning_rate": 6.079941802305361e-05, "loss": 1.7744, "step": 4169 }, { "epoch": 0.85, "learning_rate": 6.064242539591874e-05, "loss": 1.8662, "step": 4170 }, { "epoch": 0.85, "learning_rate": 6.048562263329138e-05, "loss": 1.9658, "step": 4171 }, { "epoch": 0.85, "learning_rate": 6.032900980293221e-05, "loss": 1.9092, "step": 4172 }, { "epoch": 0.85, "learning_rate": 6.017258697252037e-05, "loss": 1.7451, "step": 4173 }, { "epoch": 0.85, "learning_rate": 6.001635420965235e-05, "loss": 1.7432, "step": 4174 }, { "epoch": 0.85, "learning_rate": 5.986031158184296e-05, "loss": 1.9326, "step": 4175 }, { "epoch": 0.85, "learning_rate": 5.970445915652456e-05, "loss": 1.7871, "step": 4176 }, { "epoch": 0.85, "learning_rate": 5.954879700104732e-05, "loss": 1.9434, "step": 4177 }, { "epoch": 0.85, "learning_rate": 5.939332518267943e-05, "loss": 1.918, "step": 4178 }, { "epoch": 0.85, "learning_rate": 5.9238043768606474e-05, "loss": 1.9062, "step": 4179 }, { "epoch": 0.85, "learning_rate": 5.908295282593207e-05, "loss": 1.7803, "step": 4180 }, { "epoch": 0.85, "learning_rate": 5.892805242167748e-05, "loss": 1.876, "step": 4181 }, { "epoch": 0.85, "learning_rate": 5.87733426227815e-05, "loss": 1.7881, "step": 4182 }, { "epoch": 0.85, "learning_rate": 5.8618823496100426e-05, "loss": 1.8643, "step": 4183 }, { "epoch": 0.85, "learning_rate": 5.8464495108408625e-05, "loss": 1.9014, "step": 4184 }, { "epoch": 0.85, "learning_rate": 5.831035752639763e-05, "loss": 1.9482, "step": 4185 }, { "epoch": 0.85, "learning_rate": 5.81564108166765e-05, "loss": 1.8564, "step": 4186 }, { "epoch": 0.85, "learning_rate": 5.800265504577201e-05, "loss": 1.8428, "step": 4187 }, { "epoch": 0.85, "learning_rate": 5.784909028012858e-05, "loss": 1.9805, "step": 4188 }, { "epoch": 0.85, "learning_rate": 5.769571658610761e-05, "loss": 1.8047, "step": 4189 }, { "epoch": 0.85, "learning_rate": 5.7542534029988436e-05, "loss": 1.8877, "step": 4190 }, { "epoch": 0.85, "learning_rate": 5.7389542677967365e-05, "loss": 1.8184, "step": 4191 }, { "epoch": 0.85, "learning_rate": 5.72367425961583e-05, "loss": 1.9238, "step": 4192 }, { "epoch": 0.85, "learning_rate": 5.708413385059241e-05, "loss": 1.9844, "step": 4193 }, { "epoch": 0.85, "learning_rate": 5.6931716507218236e-05, "loss": 1.8086, "step": 4194 }, { "epoch": 0.85, "learning_rate": 5.677949063190169e-05, "loss": 1.7568, "step": 4195 }, { "epoch": 0.85, "learning_rate": 5.66274562904257e-05, "loss": 1.9102, "step": 4196 }, { "epoch": 0.85, "learning_rate": 5.647561354849079e-05, "loss": 1.8711, "step": 4197 }, { "epoch": 0.85, "learning_rate": 5.632396247171428e-05, "loss": 1.8887, "step": 4198 }, { "epoch": 0.85, "learning_rate": 5.617250312563082e-05, "loss": 1.8418, "step": 4199 }, { "epoch": 0.85, "learning_rate": 5.602123557569239e-05, "loss": 1.9268, "step": 4200 }, { "epoch": 0.85, "learning_rate": 5.587015988726774e-05, "loss": 1.7549, "step": 4201 }, { "epoch": 0.85, "learning_rate": 5.571927612564298e-05, "loss": 1.9414, "step": 4202 }, { "epoch": 0.85, "learning_rate": 5.55685843560213e-05, "loss": 1.8135, "step": 4203 }, { "epoch": 0.85, "learning_rate": 5.541808464352277e-05, "loss": 1.9072, "step": 4204 }, { "epoch": 0.85, "learning_rate": 5.526777705318442e-05, "loss": 1.9746, "step": 4205 }, { "epoch": 0.85, "learning_rate": 5.511766164996046e-05, "loss": 1.96, "step": 4206 }, { "epoch": 0.85, "learning_rate": 5.496773849872183e-05, "loss": 1.8213, "step": 4207 }, { "epoch": 0.85, "learning_rate": 5.4818007664256656e-05, "loss": 1.8564, "step": 4208 }, { "epoch": 0.85, "learning_rate": 5.466846921126961e-05, "loss": 1.8516, "step": 4209 }, { "epoch": 0.85, "learning_rate": 5.451912320438263e-05, "loss": 1.8545, "step": 4210 }, { "epoch": 0.85, "learning_rate": 5.436996970813418e-05, "loss": 1.8076, "step": 4211 }, { "epoch": 0.85, "learning_rate": 5.42210087869795e-05, "loss": 1.8145, "step": 4212 }, { "epoch": 0.86, "learning_rate": 5.407224050529097e-05, "loss": 1.9375, "step": 4213 }, { "epoch": 0.86, "learning_rate": 5.392366492735723e-05, "loss": 1.8428, "step": 4214 }, { "epoch": 0.86, "learning_rate": 5.3775282117384086e-05, "loss": 1.9766, "step": 4215 }, { "epoch": 0.86, "learning_rate": 5.362709213949396e-05, "loss": 1.749, "step": 4216 }, { "epoch": 0.86, "learning_rate": 5.347909505772569e-05, "loss": 1.8721, "step": 4217 }, { "epoch": 0.86, "learning_rate": 5.3331290936034895e-05, "loss": 1.9082, "step": 4218 }, { "epoch": 0.86, "learning_rate": 5.318367983829392e-05, "loss": 1.8086, "step": 4219 }, { "epoch": 0.86, "learning_rate": 5.303626182829158e-05, "loss": 1.8711, "step": 4220 }, { "epoch": 0.86, "learning_rate": 5.288903696973313e-05, "loss": 1.9238, "step": 4221 }, { "epoch": 0.86, "learning_rate": 5.274200532624057e-05, "loss": 1.873, "step": 4222 }, { "epoch": 0.86, "learning_rate": 5.2595166961352527e-05, "loss": 1.7959, "step": 4223 }, { "epoch": 0.86, "learning_rate": 5.244852193852373e-05, "loss": 1.917, "step": 4224 }, { "epoch": 0.86, "learning_rate": 5.23020703211255e-05, "loss": 1.915, "step": 4225 }, { "epoch": 0.86, "learning_rate": 5.2155812172445695e-05, "loss": 1.8955, "step": 4226 }, { "epoch": 0.86, "learning_rate": 5.200974755568849e-05, "loss": 1.8398, "step": 4227 }, { "epoch": 0.86, "learning_rate": 5.186387653397434e-05, "loss": 1.8896, "step": 4228 }, { "epoch": 0.86, "learning_rate": 5.1718199170340094e-05, "loss": 1.8232, "step": 4229 }, { "epoch": 0.86, "learning_rate": 5.157271552773918e-05, "loss": 1.9385, "step": 4230 }, { "epoch": 0.86, "learning_rate": 5.1427425669040795e-05, "loss": 1.9404, "step": 4231 }, { "epoch": 0.86, "learning_rate": 5.128232965703095e-05, "loss": 1.8652, "step": 4232 }, { "epoch": 0.86, "learning_rate": 5.1137427554411365e-05, "loss": 1.75, "step": 4233 }, { "epoch": 0.86, "learning_rate": 5.099271942380024e-05, "loss": 1.8789, "step": 4234 }, { "epoch": 0.86, "learning_rate": 5.084820532773199e-05, "loss": 1.7793, "step": 4235 }, { "epoch": 0.86, "learning_rate": 5.070388532865716e-05, "loss": 1.8652, "step": 4236 }, { "epoch": 0.86, "learning_rate": 5.055975948894226e-05, "loss": 1.8604, "step": 4237 }, { "epoch": 0.86, "learning_rate": 5.041582787087007e-05, "loss": 1.8389, "step": 4238 }, { "epoch": 0.86, "learning_rate": 5.027209053663939e-05, "loss": 1.9043, "step": 4239 }, { "epoch": 0.86, "learning_rate": 5.0128547548364876e-05, "loss": 1.6826, "step": 4240 }, { "epoch": 0.86, "learning_rate": 4.9985198968077626e-05, "loss": 1.8555, "step": 4241 }, { "epoch": 0.86, "learning_rate": 4.98420448577242e-05, "loss": 1.9688, "step": 4242 }, { "epoch": 0.86, "learning_rate": 4.9699085279167665e-05, "loss": 1.8398, "step": 4243 }, { "epoch": 0.86, "learning_rate": 4.955632029418644e-05, "loss": 1.9648, "step": 4244 }, { "epoch": 0.86, "learning_rate": 4.941374996447545e-05, "loss": 1.916, "step": 4245 }, { "epoch": 0.86, "learning_rate": 4.927137435164503e-05, "loss": 1.834, "step": 4246 }, { "epoch": 0.86, "learning_rate": 4.9129193517221504e-05, "loss": 1.79, "step": 4247 }, { "epoch": 0.86, "learning_rate": 4.898720752264729e-05, "loss": 1.8369, "step": 4248 }, { "epoch": 0.86, "learning_rate": 4.884541642928009e-05, "loss": 1.9551, "step": 4249 }, { "epoch": 0.86, "learning_rate": 4.870382029839387e-05, "loss": 1.8916, "step": 4250 }, { "epoch": 0.86, "learning_rate": 4.856241919117821e-05, "loss": 1.9033, "step": 4251 }, { "epoch": 0.86, "learning_rate": 4.842121316873821e-05, "loss": 1.7773, "step": 4252 }, { "epoch": 0.86, "learning_rate": 4.8280202292094844e-05, "loss": 1.9121, "step": 4253 }, { "epoch": 0.86, "learning_rate": 4.8139386622184755e-05, "loss": 1.8994, "step": 4254 }, { "epoch": 0.86, "learning_rate": 4.7998766219860214e-05, "loss": 1.9111, "step": 4255 }, { "epoch": 0.86, "learning_rate": 4.785834114588894e-05, "loss": 1.8525, "step": 4256 }, { "epoch": 0.86, "learning_rate": 4.7718111460954506e-05, "loss": 1.9678, "step": 4257 }, { "epoch": 0.86, "learning_rate": 4.757807722565605e-05, "loss": 1.8701, "step": 4258 }, { "epoch": 0.86, "learning_rate": 4.7438238500507994e-05, "loss": 1.8281, "step": 4259 }, { "epoch": 0.86, "learning_rate": 4.729859534594033e-05, "loss": 1.7881, "step": 4260 }, { "epoch": 0.86, "learning_rate": 4.7159147822298795e-05, "loss": 1.8916, "step": 4261 }, { "epoch": 0.87, "learning_rate": 4.7019895989844355e-05, "loss": 1.8682, "step": 4262 }, { "epoch": 0.87, "learning_rate": 4.688083990875336e-05, "loss": 1.8467, "step": 4263 }, { "epoch": 0.87, "learning_rate": 4.674197963911775e-05, "loss": 1.8574, "step": 4264 }, { "epoch": 0.87, "learning_rate": 4.660331524094491e-05, "loss": 1.8398, "step": 4265 }, { "epoch": 0.87, "learning_rate": 4.6464846774157135e-05, "loss": 1.7881, "step": 4266 }, { "epoch": 0.87, "learning_rate": 4.632657429859266e-05, "loss": 1.7881, "step": 4267 }, { "epoch": 0.87, "learning_rate": 4.6188497874004535e-05, "loss": 1.8789, "step": 4268 }, { "epoch": 0.87, "learning_rate": 4.6050617560061236e-05, "loss": 1.8135, "step": 4269 }, { "epoch": 0.87, "learning_rate": 4.591293341634661e-05, "loss": 1.8193, "step": 4270 }, { "epoch": 0.87, "learning_rate": 4.5775445502359736e-05, "loss": 1.8066, "step": 4271 }, { "epoch": 0.87, "learning_rate": 4.563815387751463e-05, "loss": 1.8594, "step": 4272 }, { "epoch": 0.87, "learning_rate": 4.5501058601140843e-05, "loss": 1.8125, "step": 4273 }, { "epoch": 0.87, "learning_rate": 4.5364159732482756e-05, "loss": 1.8896, "step": 4274 }, { "epoch": 0.87, "learning_rate": 4.522745733070005e-05, "loss": 1.8701, "step": 4275 }, { "epoch": 0.87, "learning_rate": 4.509095145486736e-05, "loss": 1.9014, "step": 4276 }, { "epoch": 0.87, "learning_rate": 4.49546421639746e-05, "loss": 1.8223, "step": 4277 }, { "epoch": 0.87, "learning_rate": 4.481852951692672e-05, "loss": 1.8652, "step": 4278 }, { "epoch": 0.87, "learning_rate": 4.468261357254338e-05, "loss": 1.8438, "step": 4279 }, { "epoch": 0.87, "learning_rate": 4.4546894389559725e-05, "loss": 1.9209, "step": 4280 }, { "epoch": 0.87, "learning_rate": 4.441137202662543e-05, "loss": 1.8438, "step": 4281 }, { "epoch": 0.87, "learning_rate": 4.427604654230527e-05, "loss": 1.7793, "step": 4282 }, { "epoch": 0.87, "learning_rate": 4.4140917995079076e-05, "loss": 1.7803, "step": 4283 }, { "epoch": 0.87, "learning_rate": 4.400598644334136e-05, "loss": 1.6885, "step": 4284 }, { "epoch": 0.87, "learning_rate": 4.3871251945401594e-05, "loss": 1.8213, "step": 4285 }, { "epoch": 0.87, "learning_rate": 4.3736714559484245e-05, "loss": 1.8008, "step": 4286 }, { "epoch": 0.87, "learning_rate": 4.360237434372838e-05, "loss": 1.8975, "step": 4287 }, { "epoch": 0.87, "learning_rate": 4.346823135618788e-05, "loss": 1.8008, "step": 4288 }, { "epoch": 0.87, "learning_rate": 4.333428565483155e-05, "loss": 1.8369, "step": 4289 }, { "epoch": 0.87, "learning_rate": 4.320053729754281e-05, "loss": 1.8145, "step": 4290 }, { "epoch": 0.87, "learning_rate": 4.306698634211975e-05, "loss": 1.8008, "step": 4291 }, { "epoch": 0.87, "learning_rate": 4.2933632846275326e-05, "loss": 1.8594, "step": 4292 }, { "epoch": 0.87, "learning_rate": 4.280047686763716e-05, "loss": 1.7852, "step": 4293 }, { "epoch": 0.87, "learning_rate": 4.2667518463747334e-05, "loss": 1.8975, "step": 4294 }, { "epoch": 0.87, "learning_rate": 4.253475769206255e-05, "loss": 1.8164, "step": 4295 }, { "epoch": 0.87, "learning_rate": 4.2402194609954425e-05, "loss": 1.7861, "step": 4296 }, { "epoch": 0.87, "learning_rate": 4.2269829274708735e-05, "loss": 1.9775, "step": 4297 }, { "epoch": 0.87, "learning_rate": 4.2137661743526065e-05, "loss": 1.9199, "step": 4298 }, { "epoch": 0.87, "learning_rate": 4.2005692073521614e-05, "loss": 1.874, "step": 4299 }, { "epoch": 0.87, "learning_rate": 4.187392032172471e-05, "loss": 1.791, "step": 4300 }, { "epoch": 0.87, "learning_rate": 4.17423465450793e-05, "loss": 1.8496, "step": 4301 }, { "epoch": 0.87, "learning_rate": 4.161097080044407e-05, "loss": 1.8232, "step": 4302 }, { "epoch": 0.87, "learning_rate": 4.147979314459177e-05, "loss": 1.8643, "step": 4303 }, { "epoch": 0.87, "learning_rate": 4.134881363420956e-05, "loss": 1.834, "step": 4304 }, { "epoch": 0.87, "learning_rate": 4.121803232589916e-05, "loss": 1.7539, "step": 4305 }, { "epoch": 0.87, "learning_rate": 4.1087449276176695e-05, "loss": 1.9111, "step": 4306 }, { "epoch": 0.87, "learning_rate": 4.09570645414723e-05, "loss": 1.8633, "step": 4307 }, { "epoch": 0.87, "learning_rate": 4.0826878178130744e-05, "loss": 1.7334, "step": 4308 }, { "epoch": 0.87, "learning_rate": 4.06968902424108e-05, "loss": 1.8896, "step": 4309 }, { "epoch": 0.87, "learning_rate": 4.05671007904857e-05, "loss": 1.7666, "step": 4310 }, { "epoch": 0.87, "learning_rate": 4.0437509878442635e-05, "loss": 1.9619, "step": 4311 }, { "epoch": 0.88, "learning_rate": 4.030811756228331e-05, "loss": 1.7979, "step": 4312 }, { "epoch": 0.88, "learning_rate": 4.0178923897923546e-05, "loss": 1.9072, "step": 4313 }, { "epoch": 0.88, "learning_rate": 4.004992894119303e-05, "loss": 1.7285, "step": 4314 }, { "epoch": 0.88, "learning_rate": 3.99211327478361e-05, "loss": 1.8438, "step": 4315 }, { "epoch": 0.88, "learning_rate": 3.979253537351068e-05, "loss": 1.8213, "step": 4316 }, { "epoch": 0.88, "learning_rate": 3.9664136873788946e-05, "loss": 1.8291, "step": 4317 }, { "epoch": 0.88, "learning_rate": 3.9535937304157446e-05, "loss": 1.8154, "step": 4318 }, { "epoch": 0.88, "learning_rate": 3.940793672001613e-05, "loss": 1.8418, "step": 4319 }, { "epoch": 0.88, "learning_rate": 3.928013517667961e-05, "loss": 1.9004, "step": 4320 }, { "epoch": 0.88, "learning_rate": 3.9152532729376136e-05, "loss": 1.9434, "step": 4321 }, { "epoch": 0.88, "learning_rate": 3.902512943324793e-05, "loss": 1.7422, "step": 4322 }, { "epoch": 0.88, "learning_rate": 3.889792534335118e-05, "loss": 1.9082, "step": 4323 }, { "epoch": 0.88, "learning_rate": 3.877092051465614e-05, "loss": 1.8076, "step": 4324 }, { "epoch": 0.88, "learning_rate": 3.864411500204679e-05, "loss": 1.7725, "step": 4325 }, { "epoch": 0.88, "learning_rate": 3.851750886032085e-05, "loss": 1.7285, "step": 4326 }, { "epoch": 0.88, "learning_rate": 3.839110214419017e-05, "loss": 1.9307, "step": 4327 }, { "epoch": 0.88, "learning_rate": 3.826489490828039e-05, "loss": 1.9131, "step": 4328 }, { "epoch": 0.88, "learning_rate": 3.8138887207130767e-05, "loss": 1.9199, "step": 4329 }, { "epoch": 0.88, "learning_rate": 3.8013079095194306e-05, "loss": 1.9072, "step": 4330 }, { "epoch": 0.88, "learning_rate": 3.788747062683812e-05, "loss": 1.7725, "step": 4331 }, { "epoch": 0.88, "learning_rate": 3.7762061856342567e-05, "loss": 1.7334, "step": 4332 }, { "epoch": 0.88, "learning_rate": 3.763685283790208e-05, "loss": 1.8486, "step": 4333 }, { "epoch": 0.88, "learning_rate": 3.7511843625624654e-05, "loss": 1.877, "step": 4334 }, { "epoch": 0.88, "learning_rate": 3.738703427353191e-05, "loss": 1.8506, "step": 4335 }, { "epoch": 0.88, "learning_rate": 3.7262424835558913e-05, "loss": 1.8418, "step": 4336 }, { "epoch": 0.88, "learning_rate": 3.713801536555483e-05, "loss": 1.7939, "step": 4337 }, { "epoch": 0.88, "learning_rate": 3.7013805917281985e-05, "loss": 1.8633, "step": 4338 }, { "epoch": 0.88, "learning_rate": 3.688979654441627e-05, "loss": 1.7783, "step": 4339 }, { "epoch": 0.88, "learning_rate": 3.6765987300547366e-05, "loss": 1.7686, "step": 4340 }, { "epoch": 0.88, "learning_rate": 3.664237823917843e-05, "loss": 1.8564, "step": 4341 }, { "epoch": 0.88, "learning_rate": 3.65189694137259e-05, "loss": 1.7852, "step": 4342 }, { "epoch": 0.88, "learning_rate": 3.639576087751989e-05, "loss": 1.8623, "step": 4343 }, { "epoch": 0.88, "learning_rate": 3.627275268380392e-05, "loss": 1.8018, "step": 4344 }, { "epoch": 0.88, "learning_rate": 3.6149944885734755e-05, "loss": 1.9365, "step": 4345 }, { "epoch": 0.88, "learning_rate": 3.602733753638277e-05, "loss": 1.877, "step": 4346 }, { "epoch": 0.88, "learning_rate": 3.590493068873163e-05, "loss": 1.8818, "step": 4347 }, { "epoch": 0.88, "learning_rate": 3.5782724395678476e-05, "loss": 1.8662, "step": 4348 }, { "epoch": 0.88, "learning_rate": 3.566071871003351e-05, "loss": 1.7959, "step": 4349 }, { "epoch": 0.88, "learning_rate": 3.553891368452061e-05, "loss": 1.9053, "step": 4350 }, { "epoch": 0.88, "learning_rate": 3.5417309371776674e-05, "loss": 1.8691, "step": 4351 }, { "epoch": 0.88, "learning_rate": 3.5295905824351826e-05, "loss": 1.7979, "step": 4352 }, { "epoch": 0.88, "learning_rate": 3.517470309470972e-05, "loss": 1.835, "step": 4353 }, { "epoch": 0.88, "learning_rate": 3.5053701235226886e-05, "loss": 1.8242, "step": 4354 }, { "epoch": 0.88, "learning_rate": 3.493290029819324e-05, "loss": 1.793, "step": 4355 }, { "epoch": 0.88, "learning_rate": 3.481230033581201e-05, "loss": 1.7334, "step": 4356 }, { "epoch": 0.88, "learning_rate": 3.469190140019923e-05, "loss": 1.8691, "step": 4357 }, { "epoch": 0.88, "learning_rate": 3.4571703543384295e-05, "loss": 1.8867, "step": 4358 }, { "epoch": 0.88, "learning_rate": 3.445170681730964e-05, "loss": 1.9443, "step": 4359 }, { "epoch": 0.88, "learning_rate": 3.433191127383078e-05, "loss": 1.8799, "step": 4360 }, { "epoch": 0.89, "learning_rate": 3.4212316964716386e-05, "loss": 1.8369, "step": 4361 }, { "epoch": 0.89, "learning_rate": 3.4092923941647926e-05, "loss": 1.8896, "step": 4362 }, { "epoch": 0.89, "learning_rate": 3.397373225622025e-05, "loss": 1.7812, "step": 4363 }, { "epoch": 0.89, "learning_rate": 3.3854741959940926e-05, "loss": 1.8457, "step": 4364 }, { "epoch": 0.89, "learning_rate": 3.373595310423039e-05, "loss": 1.8965, "step": 4365 }, { "epoch": 0.89, "learning_rate": 3.361736574042246e-05, "loss": 1.7334, "step": 4366 }, { "epoch": 0.89, "learning_rate": 3.349897991976336e-05, "loss": 1.7959, "step": 4367 }, { "epoch": 0.89, "learning_rate": 3.338079569341268e-05, "loss": 1.8535, "step": 4368 }, { "epoch": 0.89, "learning_rate": 3.3262813112442667e-05, "loss": 1.8311, "step": 4369 }, { "epoch": 0.89, "learning_rate": 3.3145032227838435e-05, "loss": 1.957, "step": 4370 }, { "epoch": 0.89, "learning_rate": 3.3027453090497874e-05, "loss": 1.8213, "step": 4371 }, { "epoch": 0.89, "learning_rate": 3.2910075751231895e-05, "loss": 1.8389, "step": 4372 }, { "epoch": 0.89, "learning_rate": 3.2792900260764035e-05, "loss": 1.7861, "step": 4373 }, { "epoch": 0.89, "learning_rate": 3.267592666973057e-05, "loss": 1.8105, "step": 4374 }, { "epoch": 0.89, "learning_rate": 3.255915502868062e-05, "loss": 1.9258, "step": 4375 }, { "epoch": 0.89, "learning_rate": 3.244258538807621e-05, "loss": 2.0205, "step": 4376 }, { "epoch": 0.89, "learning_rate": 3.232621779829176e-05, "loss": 1.8838, "step": 4377 }, { "epoch": 0.89, "learning_rate": 3.2210052309614436e-05, "loss": 1.8936, "step": 4378 }, { "epoch": 0.89, "learning_rate": 3.209408897224419e-05, "loss": 1.8262, "step": 4379 }, { "epoch": 0.89, "learning_rate": 3.197832783629362e-05, "loss": 1.9365, "step": 4380 }, { "epoch": 0.89, "learning_rate": 3.186276895178774e-05, "loss": 1.7373, "step": 4381 }, { "epoch": 0.89, "learning_rate": 3.1747412368664354e-05, "loss": 1.8447, "step": 4382 }, { "epoch": 0.89, "learning_rate": 3.16322581367739e-05, "loss": 1.8857, "step": 4383 }, { "epoch": 0.89, "learning_rate": 3.151730630587912e-05, "loss": 1.915, "step": 4384 }, { "epoch": 0.89, "learning_rate": 3.140255692565558e-05, "loss": 1.8018, "step": 4385 }, { "epoch": 0.89, "learning_rate": 3.1288010045691095e-05, "loss": 1.7686, "step": 4386 }, { "epoch": 0.89, "learning_rate": 3.1173665715486075e-05, "loss": 1.7578, "step": 4387 }, { "epoch": 0.89, "learning_rate": 3.10595239844535e-05, "loss": 1.8242, "step": 4388 }, { "epoch": 0.89, "learning_rate": 3.094558490191873e-05, "loss": 1.748, "step": 4389 }, { "epoch": 0.89, "learning_rate": 3.083184851711945e-05, "loss": 1.9355, "step": 4390 }, { "epoch": 0.89, "learning_rate": 3.0718314879205954e-05, "loss": 1.8174, "step": 4391 }, { "epoch": 0.89, "learning_rate": 3.060498403724071e-05, "loss": 1.8389, "step": 4392 }, { "epoch": 0.89, "learning_rate": 3.0491856040198606e-05, "loss": 1.9336, "step": 4393 }, { "epoch": 0.89, "learning_rate": 3.037893093696703e-05, "loss": 1.8145, "step": 4394 }, { "epoch": 0.89, "learning_rate": 3.0266208776345395e-05, "loss": 1.8555, "step": 4395 }, { "epoch": 0.89, "learning_rate": 3.0153689607045842e-05, "loss": 1.8008, "step": 4396 }, { "epoch": 0.89, "learning_rate": 3.0041373477692314e-05, "loss": 1.9883, "step": 4397 }, { "epoch": 0.89, "learning_rate": 2.992926043682137e-05, "loss": 1.8281, "step": 4398 }, { "epoch": 0.89, "learning_rate": 2.9817350532881648e-05, "loss": 1.833, "step": 4399 }, { "epoch": 0.89, "learning_rate": 2.9705643814233907e-05, "loss": 1.8506, "step": 4400 }, { "epoch": 0.89, "learning_rate": 2.959414032915142e-05, "loss": 1.8936, "step": 4401 }, { "epoch": 0.89, "learning_rate": 2.9482840125819254e-05, "loss": 1.9287, "step": 4402 }, { "epoch": 0.89, "learning_rate": 2.9371743252334936e-05, "loss": 1.8916, "step": 4403 }, { "epoch": 0.89, "learning_rate": 2.9260849756708007e-05, "loss": 1.8564, "step": 4404 }, { "epoch": 0.89, "learning_rate": 2.9150159686860022e-05, "loss": 1.8428, "step": 4405 }, { "epoch": 0.89, "learning_rate": 2.9039673090624775e-05, "loss": 1.8721, "step": 4406 }, { "epoch": 0.89, "learning_rate": 2.8929390015748125e-05, "loss": 1.9053, "step": 4407 }, { "epoch": 0.89, "learning_rate": 2.881931050988784e-05, "loss": 1.9111, "step": 4408 }, { "epoch": 0.89, "learning_rate": 2.8709434620613762e-05, "loss": 1.9131, "step": 4409 }, { "epoch": 0.9, "learning_rate": 2.85997623954079e-05, "loss": 1.958, "step": 4410 }, { "epoch": 0.9, "learning_rate": 2.8490293881664186e-05, "loss": 1.832, "step": 4411 }, { "epoch": 0.9, "learning_rate": 2.8381029126688384e-05, "loss": 1.7461, "step": 4412 }, { "epoch": 0.9, "learning_rate": 2.8271968177698282e-05, "loss": 1.875, "step": 4413 }, { "epoch": 0.9, "learning_rate": 2.816311108182368e-05, "loss": 1.8564, "step": 4414 }, { "epoch": 0.9, "learning_rate": 2.8054457886106167e-05, "loss": 1.9717, "step": 4415 }, { "epoch": 0.9, "learning_rate": 2.7946008637499244e-05, "loss": 1.8613, "step": 4416 }, { "epoch": 0.9, "learning_rate": 2.7837763382868252e-05, "loss": 1.8057, "step": 4417 }, { "epoch": 0.9, "learning_rate": 2.772972216899061e-05, "loss": 1.8721, "step": 4418 }, { "epoch": 0.9, "learning_rate": 2.7621885042555196e-05, "loss": 1.917, "step": 4419 }, { "epoch": 0.9, "learning_rate": 2.7514252050163003e-05, "loss": 1.877, "step": 4420 }, { "epoch": 0.9, "learning_rate": 2.740682323832666e-05, "loss": 1.707, "step": 4421 }, { "epoch": 0.9, "learning_rate": 2.7299598653470425e-05, "loss": 1.8652, "step": 4422 }, { "epoch": 0.9, "learning_rate": 2.7192578341930617e-05, "loss": 1.7959, "step": 4423 }, { "epoch": 0.9, "learning_rate": 2.7085762349955135e-05, "loss": 1.7783, "step": 4424 }, { "epoch": 0.9, "learning_rate": 2.697915072370344e-05, "loss": 1.875, "step": 4425 }, { "epoch": 0.9, "learning_rate": 2.6872743509246954e-05, "loss": 1.7324, "step": 4426 }, { "epoch": 0.9, "learning_rate": 2.6766540752568514e-05, "loss": 1.7871, "step": 4427 }, { "epoch": 0.9, "learning_rate": 2.6660542499562788e-05, "loss": 1.8916, "step": 4428 }, { "epoch": 0.9, "learning_rate": 2.6554748796035865e-05, "loss": 1.917, "step": 4429 }, { "epoch": 0.9, "learning_rate": 2.6449159687705615e-05, "loss": 1.8223, "step": 4430 }, { "epoch": 0.9, "learning_rate": 2.6343775220201537e-05, "loss": 1.7842, "step": 4431 }, { "epoch": 0.9, "learning_rate": 2.6238595439064485e-05, "loss": 1.8721, "step": 4432 }, { "epoch": 0.9, "learning_rate": 2.6133620389747094e-05, "loss": 1.8301, "step": 4433 }, { "epoch": 0.9, "learning_rate": 2.60288501176133e-05, "loss": 1.8223, "step": 4434 }, { "epoch": 0.9, "learning_rate": 2.5924284667938613e-05, "loss": 1.792, "step": 4435 }, { "epoch": 0.9, "learning_rate": 2.5819924085910217e-05, "loss": 1.915, "step": 4436 }, { "epoch": 0.9, "learning_rate": 2.5715768416626485e-05, "loss": 1.8711, "step": 4437 }, { "epoch": 0.9, "learning_rate": 2.5611817705097406e-05, "loss": 1.8066, "step": 4438 }, { "epoch": 0.9, "learning_rate": 2.5508071996244497e-05, "loss": 1.9229, "step": 4439 }, { "epoch": 0.9, "learning_rate": 2.5404531334900448e-05, "loss": 1.9453, "step": 4440 }, { "epoch": 0.9, "learning_rate": 2.530119576580936e-05, "loss": 1.8438, "step": 4441 }, { "epoch": 0.9, "learning_rate": 2.519806533362695e-05, "loss": 1.8486, "step": 4442 }, { "epoch": 0.9, "learning_rate": 2.5095140082920022e-05, "loss": 1.7246, "step": 4443 }, { "epoch": 0.9, "learning_rate": 2.4992420058166765e-05, "loss": 1.9385, "step": 4444 }, { "epoch": 0.9, "learning_rate": 2.4889905303756844e-05, "loss": 1.8184, "step": 4445 }, { "epoch": 0.9, "learning_rate": 2.4787595863991153e-05, "loss": 1.8809, "step": 4446 }, { "epoch": 0.9, "learning_rate": 2.4685491783081714e-05, "loss": 1.8555, "step": 4447 }, { "epoch": 0.9, "learning_rate": 2.4583593105151846e-05, "loss": 1.8867, "step": 4448 }, { "epoch": 0.9, "learning_rate": 2.4481899874236323e-05, "loss": 1.9111, "step": 4449 }, { "epoch": 0.9, "learning_rate": 2.4380412134280883e-05, "loss": 1.8018, "step": 4450 }, { "epoch": 0.9, "learning_rate": 2.427912992914255e-05, "loss": 1.8525, "step": 4451 }, { "epoch": 0.9, "learning_rate": 2.4178053302589598e-05, "loss": 1.8457, "step": 4452 }, { "epoch": 0.9, "learning_rate": 2.4077182298301414e-05, "loss": 1.96, "step": 4453 }, { "epoch": 0.9, "learning_rate": 2.3976516959868412e-05, "loss": 1.8545, "step": 4454 }, { "epoch": 0.9, "learning_rate": 2.3876057330792345e-05, "loss": 1.8018, "step": 4455 }, { "epoch": 0.9, "learning_rate": 2.3775803454485935e-05, "loss": 1.7871, "step": 4456 }, { "epoch": 0.9, "learning_rate": 2.3675755374272856e-05, "loss": 1.8818, "step": 4457 }, { "epoch": 0.9, "learning_rate": 2.3575913133388195e-05, "loss": 1.8779, "step": 4458 }, { "epoch": 0.91, "learning_rate": 2.347627677497788e-05, "loss": 1.8604, "step": 4459 }, { "epoch": 0.91, "learning_rate": 2.3376846342098744e-05, "loss": 1.8135, "step": 4460 }, { "epoch": 0.91, "learning_rate": 2.3277621877718924e-05, "loss": 1.9072, "step": 4461 }, { "epoch": 0.91, "learning_rate": 2.3178603424717394e-05, "loss": 1.8057, "step": 4462 }, { "epoch": 0.91, "learning_rate": 2.3079791025884033e-05, "loss": 1.876, "step": 4463 }, { "epoch": 0.91, "learning_rate": 2.298118472391969e-05, "loss": 1.9736, "step": 4464 }, { "epoch": 0.91, "learning_rate": 2.2882784561436277e-05, "loss": 1.7578, "step": 4465 }, { "epoch": 0.91, "learning_rate": 2.2784590580956664e-05, "loss": 1.8447, "step": 4466 }, { "epoch": 0.91, "learning_rate": 2.268660282491436e-05, "loss": 1.8311, "step": 4467 }, { "epoch": 0.91, "learning_rate": 2.258882133565404e-05, "loss": 1.8965, "step": 4468 }, { "epoch": 0.91, "learning_rate": 2.2491246155431076e-05, "loss": 1.8916, "step": 4469 }, { "epoch": 0.91, "learning_rate": 2.2393877326411573e-05, "loss": 1.9033, "step": 4470 }, { "epoch": 0.91, "learning_rate": 2.2296714890672808e-05, "loss": 1.8906, "step": 4471 }, { "epoch": 0.91, "learning_rate": 2.2199758890202594e-05, "loss": 1.8799, "step": 4472 }, { "epoch": 0.91, "learning_rate": 2.2103009366899575e-05, "loss": 1.9609, "step": 4473 }, { "epoch": 0.91, "learning_rate": 2.2006466362573307e-05, "loss": 1.8184, "step": 4474 }, { "epoch": 0.91, "learning_rate": 2.191012991894392e-05, "loss": 1.9629, "step": 4475 }, { "epoch": 0.91, "learning_rate": 2.1814000077642338e-05, "loss": 1.8096, "step": 4476 }, { "epoch": 0.91, "learning_rate": 2.1718076880210326e-05, "loss": 1.8379, "step": 4477 }, { "epoch": 0.91, "learning_rate": 2.1622360368100226e-05, "loss": 1.8779, "step": 4478 }, { "epoch": 0.91, "learning_rate": 2.152685058267495e-05, "loss": 1.7227, "step": 4479 }, { "epoch": 0.91, "learning_rate": 2.143154756520832e-05, "loss": 1.8027, "step": 4480 }, { "epoch": 0.91, "learning_rate": 2.133645135688478e-05, "loss": 1.8008, "step": 4481 }, { "epoch": 0.91, "learning_rate": 2.1241561998799174e-05, "loss": 1.7686, "step": 4482 }, { "epoch": 0.91, "learning_rate": 2.1146879531957153e-05, "loss": 1.8877, "step": 4483 }, { "epoch": 0.91, "learning_rate": 2.105240399727493e-05, "loss": 1.7969, "step": 4484 }, { "epoch": 0.91, "learning_rate": 2.095813543557923e-05, "loss": 1.7812, "step": 4485 }, { "epoch": 0.91, "learning_rate": 2.0864073887607416e-05, "loss": 1.8193, "step": 4486 }, { "epoch": 0.91, "learning_rate": 2.0770219394007362e-05, "loss": 1.8115, "step": 4487 }, { "epoch": 0.91, "learning_rate": 2.0676571995337512e-05, "loss": 1.8789, "step": 4488 }, { "epoch": 0.91, "learning_rate": 2.0583131732066606e-05, "loss": 1.9131, "step": 4489 }, { "epoch": 0.91, "learning_rate": 2.048989864457418e-05, "loss": 1.7998, "step": 4490 }, { "epoch": 0.91, "learning_rate": 2.039687277315011e-05, "loss": 1.8867, "step": 4491 }, { "epoch": 0.91, "learning_rate": 2.0304054157994523e-05, "loss": 1.8105, "step": 4492 }, { "epoch": 0.91, "learning_rate": 2.0211442839218275e-05, "loss": 1.7578, "step": 4493 }, { "epoch": 0.91, "learning_rate": 2.011903885684263e-05, "loss": 1.9268, "step": 4494 }, { "epoch": 0.91, "learning_rate": 2.0026842250799037e-05, "loss": 1.7891, "step": 4495 }, { "epoch": 0.91, "learning_rate": 1.9934853060929458e-05, "loss": 1.9775, "step": 4496 }, { "epoch": 0.91, "learning_rate": 1.984307132698626e-05, "loss": 1.8115, "step": 4497 }, { "epoch": 0.91, "learning_rate": 1.9751497088632054e-05, "loss": 1.915, "step": 4498 }, { "epoch": 0.91, "learning_rate": 1.9660130385439846e-05, "loss": 1.9023, "step": 4499 }, { "epoch": 0.91, "learning_rate": 1.9568971256892942e-05, "loss": 1.7969, "step": 4500 }, { "epoch": 0.91, "learning_rate": 1.947801974238511e-05, "loss": 1.8193, "step": 4501 }, { "epoch": 0.91, "learning_rate": 1.9387275881220022e-05, "loss": 1.873, "step": 4502 }, { "epoch": 0.91, "learning_rate": 1.9296739712611977e-05, "loss": 1.835, "step": 4503 }, { "epoch": 0.91, "learning_rate": 1.92064112756854e-05, "loss": 1.8867, "step": 4504 }, { "epoch": 0.91, "learning_rate": 1.9116290609474786e-05, "loss": 1.8467, "step": 4505 }, { "epoch": 0.91, "learning_rate": 1.9026377752925095e-05, "loss": 1.8535, "step": 4506 }, { "epoch": 0.91, "learning_rate": 1.893667274489136e-05, "loss": 1.9307, "step": 4507 }, { "epoch": 0.91, "learning_rate": 1.884717562413879e-05, "loss": 1.8926, "step": 4508 }, { "epoch": 0.92, "learning_rate": 1.8757886429342895e-05, "loss": 1.8574, "step": 4509 }, { "epoch": 0.92, "learning_rate": 1.866880519908909e-05, "loss": 1.9375, "step": 4510 }, { "epoch": 0.92, "learning_rate": 1.8579931971872966e-05, "loss": 1.8545, "step": 4511 }, { "epoch": 0.92, "learning_rate": 1.8491266786100525e-05, "loss": 1.7656, "step": 4512 }, { "epoch": 0.92, "learning_rate": 1.840280968008745e-05, "loss": 1.7705, "step": 4513 }, { "epoch": 0.92, "learning_rate": 1.8314560692059833e-05, "loss": 1.9014, "step": 4514 }, { "epoch": 0.92, "learning_rate": 1.822651986015361e-05, "loss": 1.8516, "step": 4515 }, { "epoch": 0.92, "learning_rate": 1.8138687222414962e-05, "loss": 1.8047, "step": 4516 }, { "epoch": 0.92, "learning_rate": 1.8051062816799912e-05, "loss": 1.8193, "step": 4517 }, { "epoch": 0.92, "learning_rate": 1.796364668117445e-05, "loss": 1.875, "step": 4518 }, { "epoch": 0.92, "learning_rate": 1.7876438853314968e-05, "loss": 1.8086, "step": 4519 }, { "epoch": 0.92, "learning_rate": 1.7789439370907325e-05, "loss": 1.8408, "step": 4520 }, { "epoch": 0.92, "learning_rate": 1.770264827154766e-05, "loss": 1.7646, "step": 4521 }, { "epoch": 0.92, "learning_rate": 1.7616065592742035e-05, "loss": 1.793, "step": 4522 }, { "epoch": 0.92, "learning_rate": 1.7529691371906354e-05, "loss": 1.7539, "step": 4523 }, { "epoch": 0.92, "learning_rate": 1.744352564636642e-05, "loss": 1.915, "step": 4524 }, { "epoch": 0.92, "learning_rate": 1.735756845335812e-05, "loss": 1.7832, "step": 4525 }, { "epoch": 0.92, "learning_rate": 1.7271819830026957e-05, "loss": 1.8086, "step": 4526 }, { "epoch": 0.92, "learning_rate": 1.718627981342852e-05, "loss": 1.8604, "step": 4527 }, { "epoch": 0.92, "learning_rate": 1.710094844052812e-05, "loss": 1.9727, "step": 4528 }, { "epoch": 0.92, "learning_rate": 1.701582574820104e-05, "loss": 1.8564, "step": 4529 }, { "epoch": 0.92, "learning_rate": 1.6930911773232306e-05, "loss": 1.9082, "step": 4530 }, { "epoch": 0.92, "learning_rate": 1.6846206552316613e-05, "loss": 1.915, "step": 4531 }, { "epoch": 0.92, "learning_rate": 1.676171012205874e-05, "loss": 1.8174, "step": 4532 }, { "epoch": 0.92, "learning_rate": 1.667742251897303e-05, "loss": 1.8564, "step": 4533 }, { "epoch": 0.92, "learning_rate": 1.6593343779483517e-05, "loss": 1.8408, "step": 4534 }, { "epoch": 0.92, "learning_rate": 1.6509473939924135e-05, "loss": 1.8135, "step": 4535 }, { "epoch": 0.92, "learning_rate": 1.6425813036538616e-05, "loss": 1.8857, "step": 4536 }, { "epoch": 0.92, "learning_rate": 1.6342361105480096e-05, "loss": 1.7676, "step": 4537 }, { "epoch": 0.92, "learning_rate": 1.6259118182811785e-05, "loss": 1.8164, "step": 4538 }, { "epoch": 0.92, "learning_rate": 1.617608430450629e-05, "loss": 1.8525, "step": 4539 }, { "epoch": 0.92, "learning_rate": 1.609325950644591e-05, "loss": 1.8848, "step": 4540 }, { "epoch": 0.92, "learning_rate": 1.60106438244228e-05, "loss": 1.7812, "step": 4541 }, { "epoch": 0.92, "learning_rate": 1.5928237294138394e-05, "loss": 1.8945, "step": 4542 }, { "epoch": 0.92, "learning_rate": 1.5846039951204095e-05, "loss": 1.7861, "step": 4543 }, { "epoch": 0.92, "learning_rate": 1.576405183114077e-05, "loss": 1.8105, "step": 4544 }, { "epoch": 0.92, "learning_rate": 1.568227296937885e-05, "loss": 1.8818, "step": 4545 }, { "epoch": 0.92, "learning_rate": 1.5600703401258297e-05, "loss": 1.8486, "step": 4546 }, { "epoch": 0.92, "learning_rate": 1.551934316202869e-05, "loss": 1.9404, "step": 4547 }, { "epoch": 0.92, "learning_rate": 1.543819228684912e-05, "loss": 1.8877, "step": 4548 }, { "epoch": 0.92, "learning_rate": 1.5357250810788315e-05, "loss": 1.8535, "step": 4549 }, { "epoch": 0.92, "learning_rate": 1.52765187688243e-05, "loss": 1.9316, "step": 4550 }, { "epoch": 0.92, "learning_rate": 1.5195996195844885e-05, "loss": 1.8506, "step": 4551 }, { "epoch": 0.92, "learning_rate": 1.5115683126647072e-05, "loss": 2.0049, "step": 4552 }, { "epoch": 0.92, "learning_rate": 1.5035579595937377e-05, "loss": 1.9131, "step": 4553 }, { "epoch": 0.92, "learning_rate": 1.4955685638331995e-05, "loss": 1.8301, "step": 4554 }, { "epoch": 0.92, "learning_rate": 1.4876001288356311e-05, "loss": 1.8838, "step": 4555 }, { "epoch": 0.92, "learning_rate": 1.479652658044517e-05, "loss": 1.8906, "step": 4556 }, { "epoch": 0.92, "learning_rate": 1.471726154894304e-05, "loss": 1.8154, "step": 4557 }, { "epoch": 0.93, "learning_rate": 1.4638206228103413e-05, "loss": 1.8457, "step": 4558 }, { "epoch": 0.93, "learning_rate": 1.4559360652089404e-05, "loss": 1.8809, "step": 4559 }, { "epoch": 0.93, "learning_rate": 1.4480724854973536e-05, "loss": 1.8047, "step": 4560 }, { "epoch": 0.93, "learning_rate": 1.4402298870737517e-05, "loss": 1.8633, "step": 4561 }, { "epoch": 0.93, "learning_rate": 1.4324082733272348e-05, "loss": 1.7119, "step": 4562 }, { "epoch": 0.93, "learning_rate": 1.4246076476378489e-05, "loss": 1.9268, "step": 4563 }, { "epoch": 0.93, "learning_rate": 1.416828013376581e-05, "loss": 1.8926, "step": 4564 }, { "epoch": 0.93, "learning_rate": 1.409069373905314e-05, "loss": 1.8193, "step": 4565 }, { "epoch": 0.93, "learning_rate": 1.4013317325768826e-05, "loss": 1.8896, "step": 4566 }, { "epoch": 0.93, "learning_rate": 1.3936150927350399e-05, "loss": 1.8193, "step": 4567 }, { "epoch": 0.93, "learning_rate": 1.385919457714463e-05, "loss": 1.8926, "step": 4568 }, { "epoch": 0.93, "learning_rate": 1.378244830840747e-05, "loss": 1.9102, "step": 4569 }, { "epoch": 0.93, "learning_rate": 1.3705912154304224e-05, "loss": 1.8867, "step": 4570 }, { "epoch": 0.93, "learning_rate": 1.3629586147909323e-05, "loss": 1.8516, "step": 4571 }, { "epoch": 0.93, "learning_rate": 1.355347032220633e-05, "loss": 1.8623, "step": 4572 }, { "epoch": 0.93, "learning_rate": 1.3477564710088097e-05, "loss": 1.8652, "step": 4573 }, { "epoch": 0.93, "learning_rate": 1.34018693443565e-05, "loss": 1.8496, "step": 4574 }, { "epoch": 0.93, "learning_rate": 1.3326384257722645e-05, "loss": 1.8652, "step": 4575 }, { "epoch": 0.93, "learning_rate": 1.3251109482806666e-05, "loss": 1.9219, "step": 4576 }, { "epoch": 0.93, "learning_rate": 1.3176045052138097e-05, "loss": 1.7881, "step": 4577 }, { "epoch": 0.93, "learning_rate": 1.3101190998155155e-05, "loss": 1.7939, "step": 4578 }, { "epoch": 0.93, "learning_rate": 1.302654735320552e-05, "loss": 1.7373, "step": 4579 }, { "epoch": 0.93, "learning_rate": 1.2952114149545724e-05, "loss": 1.8525, "step": 4580 }, { "epoch": 0.93, "learning_rate": 1.2877891419341426e-05, "loss": 1.8652, "step": 4581 }, { "epoch": 0.93, "learning_rate": 1.2803879194667245e-05, "loss": 1.8242, "step": 4582 }, { "epoch": 0.93, "learning_rate": 1.2730077507506987e-05, "loss": 1.7959, "step": 4583 }, { "epoch": 0.93, "learning_rate": 1.2656486389753418e-05, "loss": 1.8418, "step": 4584 }, { "epoch": 0.93, "learning_rate": 1.2583105873208266e-05, "loss": 1.7949, "step": 4585 }, { "epoch": 0.93, "learning_rate": 1.2509935989582332e-05, "loss": 1.8467, "step": 4586 }, { "epoch": 0.93, "learning_rate": 1.2436976770495268e-05, "loss": 1.833, "step": 4587 }, { "epoch": 0.93, "learning_rate": 1.2364228247475684e-05, "loss": 1.8711, "step": 4588 }, { "epoch": 0.93, "learning_rate": 1.2291690451961435e-05, "loss": 1.8398, "step": 4589 }, { "epoch": 0.93, "learning_rate": 1.2219363415298833e-05, "loss": 1.7578, "step": 4590 }, { "epoch": 0.93, "learning_rate": 1.2147247168743547e-05, "loss": 1.7891, "step": 4591 }, { "epoch": 0.93, "learning_rate": 1.2075341743459978e-05, "loss": 1.8809, "step": 4592 }, { "epoch": 0.93, "learning_rate": 1.2003647170521381e-05, "loss": 1.8066, "step": 4593 }, { "epoch": 0.93, "learning_rate": 1.1932163480909864e-05, "loss": 1.8545, "step": 4594 }, { "epoch": 0.93, "learning_rate": 1.1860890705516547e-05, "loss": 1.8818, "step": 4595 }, { "epoch": 0.93, "learning_rate": 1.1789828875141351e-05, "loss": 1.8506, "step": 4596 }, { "epoch": 0.93, "learning_rate": 1.1718978020492987e-05, "loss": 1.957, "step": 4597 }, { "epoch": 0.93, "learning_rate": 1.1648338172188966e-05, "loss": 1.79, "step": 4598 }, { "epoch": 0.93, "learning_rate": 1.1577909360755812e-05, "loss": 1.7197, "step": 4599 }, { "epoch": 0.93, "learning_rate": 1.150769161662868e-05, "loss": 1.8135, "step": 4600 }, { "epoch": 0.93, "learning_rate": 1.1437684970151407e-05, "loss": 1.7754, "step": 4601 }, { "epoch": 0.93, "learning_rate": 1.1367889451576962e-05, "loss": 1.8789, "step": 4602 }, { "epoch": 0.93, "learning_rate": 1.1298305091066662e-05, "loss": 1.8525, "step": 4603 }, { "epoch": 0.93, "learning_rate": 1.12289319186909e-05, "loss": 1.8555, "step": 4604 }, { "epoch": 0.93, "learning_rate": 1.1159769964428745e-05, "loss": 1.8369, "step": 4605 }, { "epoch": 0.93, "learning_rate": 1.1090819258167795e-05, "loss": 1.8428, "step": 4606 }, { "epoch": 0.94, "learning_rate": 1.1022079829704435e-05, "loss": 1.8467, "step": 4607 }, { "epoch": 0.94, "learning_rate": 1.0953551708744014e-05, "loss": 1.7627, "step": 4608 }, { "epoch": 0.94, "learning_rate": 1.0885234924900234e-05, "loss": 1.835, "step": 4609 }, { "epoch": 0.94, "learning_rate": 1.0817129507695534e-05, "loss": 1.8711, "step": 4610 }, { "epoch": 0.94, "learning_rate": 1.0749235486561149e-05, "loss": 1.7959, "step": 4611 }, { "epoch": 0.94, "learning_rate": 1.0681552890836943e-05, "loss": 1.8857, "step": 4612 }, { "epoch": 0.94, "learning_rate": 1.0614081749771187e-05, "loss": 1.8281, "step": 4613 }, { "epoch": 0.94, "learning_rate": 1.0546822092521113e-05, "loss": 1.9248, "step": 4614 }, { "epoch": 0.94, "learning_rate": 1.0479773948152305e-05, "loss": 1.8369, "step": 4615 }, { "epoch": 0.94, "learning_rate": 1.0412937345639029e-05, "loss": 1.791, "step": 4616 }, { "epoch": 0.94, "learning_rate": 1.0346312313864126e-05, "loss": 1.8838, "step": 4617 }, { "epoch": 0.94, "learning_rate": 1.027989888161901e-05, "loss": 1.8887, "step": 4618 }, { "epoch": 0.94, "learning_rate": 1.0213697077603722e-05, "loss": 1.7959, "step": 4619 }, { "epoch": 0.94, "learning_rate": 1.0147706930426715e-05, "loss": 1.8887, "step": 4620 }, { "epoch": 0.94, "learning_rate": 1.0081928468605117e-05, "loss": 1.8408, "step": 4621 }, { "epoch": 0.94, "learning_rate": 1.0016361720564472e-05, "loss": 1.9463, "step": 4622 }, { "epoch": 0.94, "learning_rate": 9.951006714638833e-06, "loss": 1.8721, "step": 4623 }, { "epoch": 0.94, "learning_rate": 9.88586347907089e-06, "loss": 1.8877, "step": 4624 }, { "epoch": 0.94, "learning_rate": 9.820932042011622e-06, "loss": 1.8418, "step": 4625 }, { "epoch": 0.94, "learning_rate": 9.75621243152064e-06, "loss": 1.8154, "step": 4626 }, { "epoch": 0.94, "learning_rate": 9.691704675565904e-06, "loss": 1.8623, "step": 4627 }, { "epoch": 0.94, "learning_rate": 9.627408802024007e-06, "loss": 1.8125, "step": 4628 }, { "epoch": 0.94, "learning_rate": 9.56332483867961e-06, "loss": 1.9355, "step": 4629 }, { "epoch": 0.94, "learning_rate": 9.499452813226283e-06, "loss": 1.8809, "step": 4630 }, { "epoch": 0.94, "learning_rate": 9.43579275326556e-06, "loss": 1.7764, "step": 4631 }, { "epoch": 0.94, "learning_rate": 9.372344686307655e-06, "loss": 1.8291, "step": 4632 }, { "epoch": 0.94, "learning_rate": 9.309108639771136e-06, "loss": 1.9531, "step": 4633 }, { "epoch": 0.94, "learning_rate": 9.246084640982866e-06, "loss": 1.9414, "step": 4634 }, { "epoch": 0.94, "learning_rate": 9.183272717178059e-06, "loss": 1.8535, "step": 4635 }, { "epoch": 0.94, "learning_rate": 9.120672895500393e-06, "loss": 1.8516, "step": 4636 }, { "epoch": 0.94, "learning_rate": 9.058285203001837e-06, "loss": 1.8691, "step": 4637 }, { "epoch": 0.94, "learning_rate": 8.99610966664266e-06, "loss": 1.832, "step": 4638 }, { "epoch": 0.94, "learning_rate": 8.934146313291425e-06, "loss": 1.8379, "step": 4639 }, { "epoch": 0.94, "learning_rate": 8.872395169725156e-06, "loss": 1.834, "step": 4640 }, { "epoch": 0.94, "learning_rate": 8.810856262629064e-06, "loss": 1.7988, "step": 4641 }, { "epoch": 0.94, "learning_rate": 8.749529618596485e-06, "loss": 1.8643, "step": 4642 }, { "epoch": 0.94, "learning_rate": 8.68841526412939e-06, "loss": 1.7891, "step": 4643 }, { "epoch": 0.94, "learning_rate": 8.627513225637651e-06, "loss": 1.9717, "step": 4644 }, { "epoch": 0.94, "learning_rate": 8.56682352943966e-06, "loss": 1.9082, "step": 4645 }, { "epoch": 0.94, "learning_rate": 8.506346201761828e-06, "loss": 1.7881, "step": 4646 }, { "epoch": 0.94, "learning_rate": 8.44608126873897e-06, "loss": 1.8564, "step": 4647 }, { "epoch": 0.94, "learning_rate": 8.386028756414089e-06, "loss": 1.7744, "step": 4648 }, { "epoch": 0.94, "learning_rate": 8.326188690738257e-06, "loss": 1.877, "step": 4649 }, { "epoch": 0.94, "learning_rate": 8.266561097570902e-06, "loss": 1.8057, "step": 4650 }, { "epoch": 0.94, "learning_rate": 8.207146002679522e-06, "loss": 1.79, "step": 4651 }, { "epoch": 0.94, "learning_rate": 8.147943431739801e-06, "loss": 1.7832, "step": 4652 }, { "epoch": 0.94, "learning_rate": 8.088953410335609e-06, "loss": 1.8828, "step": 4653 }, { "epoch": 0.94, "learning_rate": 8.030175963959108e-06, "loss": 1.8604, "step": 4654 }, { "epoch": 0.94, "learning_rate": 7.971611118010258e-06, "loss": 1.8301, "step": 4655 }, { "epoch": 0.94, "learning_rate": 7.913258897797481e-06, "loss": 1.7783, "step": 4656 }, { "epoch": 0.95, "learning_rate": 7.855119328537108e-06, "loss": 1.918, "step": 4657 }, { "epoch": 0.95, "learning_rate": 7.797192435353651e-06, "loss": 1.8848, "step": 4658 }, { "epoch": 0.95, "learning_rate": 7.739478243279696e-06, "loss": 1.8721, "step": 4659 }, { "epoch": 0.95, "learning_rate": 7.681976777255962e-06, "loss": 1.7979, "step": 4660 }, { "epoch": 0.95, "learning_rate": 7.624688062131124e-06, "loss": 1.8096, "step": 4661 }, { "epoch": 0.95, "learning_rate": 7.5676121226621594e-06, "loss": 1.8076, "step": 4662 }, { "epoch": 0.95, "learning_rate": 7.5107489835137825e-06, "loss": 1.8145, "step": 4663 }, { "epoch": 0.95, "learning_rate": 7.454098669258891e-06, "loss": 1.8516, "step": 4664 }, { "epoch": 0.95, "learning_rate": 7.397661204378514e-06, "loss": 1.8223, "step": 4665 }, { "epoch": 0.95, "learning_rate": 7.341436613261532e-06, "loss": 1.8662, "step": 4666 }, { "epoch": 0.95, "learning_rate": 7.285424920204953e-06, "loss": 1.8564, "step": 4667 }, { "epoch": 0.95, "learning_rate": 7.229626149413693e-06, "loss": 1.9404, "step": 4668 }, { "epoch": 0.95, "learning_rate": 7.174040325000797e-06, "loss": 1.8213, "step": 4669 }, { "epoch": 0.95, "learning_rate": 7.118667470987106e-06, "loss": 1.8037, "step": 4670 }, { "epoch": 0.95, "learning_rate": 7.063507611301423e-06, "loss": 1.875, "step": 4671 }, { "epoch": 0.95, "learning_rate": 7.008560769780792e-06, "loss": 1.8184, "step": 4672 }, { "epoch": 0.95, "learning_rate": 6.953826970169886e-06, "loss": 1.8447, "step": 4673 }, { "epoch": 0.95, "learning_rate": 6.8993062361213946e-06, "loss": 1.7588, "step": 4674 }, { "epoch": 0.95, "learning_rate": 6.844998591196139e-06, "loss": 1.8037, "step": 4675 }, { "epoch": 0.95, "learning_rate": 6.790904058862568e-06, "loss": 1.8125, "step": 4676 }, { "epoch": 0.95, "learning_rate": 6.737022662497094e-06, "loss": 1.7773, "step": 4677 }, { "epoch": 0.95, "learning_rate": 6.683354425384259e-06, "loss": 1.8926, "step": 4678 }, { "epoch": 0.95, "learning_rate": 6.629899370716231e-06, "loss": 1.7744, "step": 4679 }, { "epoch": 0.95, "learning_rate": 6.576657521593088e-06, "loss": 1.8984, "step": 4680 }, { "epoch": 0.95, "learning_rate": 6.523628901022927e-06, "loss": 1.7324, "step": 4681 }, { "epoch": 0.95, "learning_rate": 6.470813531921527e-06, "loss": 1.8857, "step": 4682 }, { "epoch": 0.95, "learning_rate": 6.418211437112631e-06, "loss": 1.8887, "step": 4683 }, { "epoch": 0.95, "learning_rate": 6.365822639327723e-06, "loss": 1.8584, "step": 4684 }, { "epoch": 0.95, "learning_rate": 6.313647161206193e-06, "loss": 1.9355, "step": 4685 }, { "epoch": 0.95, "learning_rate": 6.261685025295283e-06, "loss": 1.8711, "step": 4686 }, { "epoch": 0.95, "learning_rate": 6.209936254049808e-06, "loss": 1.7725, "step": 4687 }, { "epoch": 0.95, "learning_rate": 6.158400869832604e-06, "loss": 1.8506, "step": 4688 }, { "epoch": 0.95, "learning_rate": 6.107078894914353e-06, "loss": 1.8721, "step": 4689 }, { "epoch": 0.95, "learning_rate": 6.055970351473261e-06, "loss": 1.7637, "step": 4690 }, { "epoch": 0.95, "learning_rate": 6.005075261595494e-06, "loss": 1.8223, "step": 4691 }, { "epoch": 0.95, "learning_rate": 5.954393647274958e-06, "loss": 1.8936, "step": 4692 }, { "epoch": 0.95, "learning_rate": 5.903925530413135e-06, "loss": 1.7568, "step": 4693 }, { "epoch": 0.95, "learning_rate": 5.853670932819466e-06, "loss": 1.9336, "step": 4694 }, { "epoch": 0.95, "learning_rate": 5.803629876211025e-06, "loss": 1.8672, "step": 4695 }, { "epoch": 0.95, "learning_rate": 5.753802382212625e-06, "loss": 1.8389, "step": 4696 }, { "epoch": 0.95, "learning_rate": 5.7041884723567636e-06, "loss": 1.8633, "step": 4697 }, { "epoch": 0.95, "learning_rate": 5.65478816808368e-06, "loss": 1.8848, "step": 4698 }, { "epoch": 0.95, "learning_rate": 5.605601490741241e-06, "loss": 1.8057, "step": 4699 }, { "epoch": 0.95, "learning_rate": 5.556628461585001e-06, "loss": 1.8154, "step": 4700 }, { "epoch": 0.95, "learning_rate": 5.507869101778307e-06, "loss": 1.7988, "step": 4701 }, { "epoch": 0.95, "learning_rate": 5.459323432392083e-06, "loss": 1.9014, "step": 4702 }, { "epoch": 0.95, "learning_rate": 5.410991474404825e-06, "loss": 1.9053, "step": 4703 }, { "epoch": 0.95, "learning_rate": 5.362873248702827e-06, "loss": 1.792, "step": 4704 }, { "epoch": 0.95, "learning_rate": 5.314968776080009e-06, "loss": 1.8252, "step": 4705 }, { "epoch": 0.96, "learning_rate": 5.267278077237758e-06, "loss": 1.8076, "step": 4706 }, { "epoch": 0.96, "learning_rate": 5.219801172785255e-06, "loss": 1.8975, "step": 4707 }, { "epoch": 0.96, "learning_rate": 5.172538083239198e-06, "loss": 1.8838, "step": 4708 }, { "epoch": 0.96, "learning_rate": 5.125488829023916e-06, "loss": 1.7305, "step": 4709 }, { "epoch": 0.96, "learning_rate": 5.078653430471425e-06, "loss": 1.8623, "step": 4710 }, { "epoch": 0.96, "learning_rate": 5.032031907821088e-06, "loss": 1.9043, "step": 4711 }, { "epoch": 0.96, "learning_rate": 4.985624281220014e-06, "loss": 1.8066, "step": 4712 }, { "epoch": 0.96, "learning_rate": 4.939430570722936e-06, "loss": 1.6982, "step": 4713 }, { "epoch": 0.96, "learning_rate": 4.893450796291998e-06, "loss": 1.8154, "step": 4714 }, { "epoch": 0.96, "learning_rate": 4.8476849777969735e-06, "loss": 1.8955, "step": 4715 }, { "epoch": 0.96, "learning_rate": 4.802133135015096e-06, "loss": 1.7881, "step": 4716 }, { "epoch": 0.96, "learning_rate": 4.756795287631288e-06, "loss": 1.8086, "step": 4717 }, { "epoch": 0.96, "learning_rate": 4.71167145523782e-06, "loss": 1.8828, "step": 4718 }, { "epoch": 0.96, "learning_rate": 4.666761657334595e-06, "loss": 1.835, "step": 4719 }, { "epoch": 0.96, "learning_rate": 4.622065913329032e-06, "loss": 1.9336, "step": 4720 }, { "epoch": 0.96, "learning_rate": 4.5775842425359595e-06, "loss": 1.9033, "step": 4721 }, { "epoch": 0.96, "learning_rate": 4.533316664177667e-06, "loss": 1.8701, "step": 4722 }, { "epoch": 0.96, "learning_rate": 4.48926319738413e-06, "loss": 1.8154, "step": 4723 }, { "epoch": 0.96, "learning_rate": 4.445423861192566e-06, "loss": 1.7764, "step": 4724 }, { "epoch": 0.96, "learning_rate": 4.4017986745478186e-06, "loss": 1.8652, "step": 4725 }, { "epoch": 0.96, "learning_rate": 4.358387656302143e-06, "loss": 1.7852, "step": 4726 }, { "epoch": 0.96, "learning_rate": 4.315190825215143e-06, "loss": 1.7578, "step": 4727 }, { "epoch": 0.96, "learning_rate": 4.272208199953997e-06, "loss": 1.7891, "step": 4728 }, { "epoch": 0.96, "learning_rate": 4.229439799093293e-06, "loss": 1.8408, "step": 4729 }, { "epoch": 0.96, "learning_rate": 4.186885641115023e-06, "loss": 1.7646, "step": 4730 }, { "epoch": 0.96, "learning_rate": 4.144545744408535e-06, "loss": 1.9521, "step": 4731 }, { "epoch": 0.96, "learning_rate": 4.1024201272706894e-06, "loss": 1.7705, "step": 4732 }, { "epoch": 0.96, "learning_rate": 4.06050880790565e-06, "loss": 1.835, "step": 4733 }, { "epoch": 0.96, "learning_rate": 4.018811804425093e-06, "loss": 1.8379, "step": 4734 }, { "epoch": 0.96, "learning_rate": 3.97732913484794e-06, "loss": 1.8184, "step": 4735 }, { "epoch": 0.96, "learning_rate": 3.936060817100628e-06, "loss": 1.8408, "step": 4736 }, { "epoch": 0.96, "learning_rate": 3.8950068690168375e-06, "loss": 1.8594, "step": 4737 }, { "epoch": 0.96, "learning_rate": 3.8541673083377085e-06, "loss": 1.8457, "step": 4738 }, { "epoch": 0.96, "learning_rate": 3.813542152711735e-06, "loss": 1.8516, "step": 4739 }, { "epoch": 0.96, "learning_rate": 3.773131419694653e-06, "loss": 1.8096, "step": 4740 }, { "epoch": 0.96, "learning_rate": 3.732935126749604e-06, "loss": 1.7656, "step": 4741 }, { "epoch": 0.96, "learning_rate": 3.692953291247081e-06, "loss": 1.8389, "step": 4742 }, { "epoch": 0.96, "learning_rate": 3.6531859304649305e-06, "loss": 1.9375, "step": 4743 }, { "epoch": 0.96, "learning_rate": 3.613633061588184e-06, "loss": 1.874, "step": 4744 }, { "epoch": 0.96, "learning_rate": 3.5742947017092795e-06, "loss": 1.79, "step": 4745 }, { "epoch": 0.96, "learning_rate": 3.535170867828008e-06, "loss": 1.7988, "step": 4746 }, { "epoch": 0.96, "learning_rate": 3.496261576851345e-06, "loss": 1.751, "step": 4747 }, { "epoch": 0.96, "learning_rate": 3.457566845593618e-06, "loss": 1.8193, "step": 4748 }, { "epoch": 0.96, "learning_rate": 3.419086690776396e-06, "loss": 1.8418, "step": 4749 }, { "epoch": 0.96, "learning_rate": 3.3808211290284885e-06, "loss": 1.957, "step": 4750 }, { "epoch": 0.96, "learning_rate": 3.342770176886112e-06, "loss": 1.8799, "step": 4751 }, { "epoch": 0.96, "learning_rate": 3.304933850792613e-06, "loss": 1.9141, "step": 4752 }, { "epoch": 0.96, "learning_rate": 3.2673121670986906e-06, "loss": 1.7852, "step": 4753 }, { "epoch": 0.96, "learning_rate": 3.2299051420620617e-06, "loss": 1.7363, "step": 4754 }, { "epoch": 0.97, "learning_rate": 3.1927127918479624e-06, "loss": 1.7734, "step": 4755 }, { "epoch": 0.97, "learning_rate": 3.1557351325287587e-06, "loss": 1.957, "step": 4756 }, { "epoch": 0.97, "learning_rate": 3.1189721800839456e-06, "loss": 1.8057, "step": 4757 }, { "epoch": 0.97, "learning_rate": 3.08242395040037e-06, "loss": 1.8975, "step": 4758 }, { "epoch": 0.97, "learning_rate": 3.0460904592719553e-06, "loss": 1.8604, "step": 4759 }, { "epoch": 0.97, "learning_rate": 3.0099717223999733e-06, "loss": 1.8301, "step": 4760 }, { "epoch": 0.97, "learning_rate": 2.9740677553928285e-06, "loss": 1.9141, "step": 4761 }, { "epoch": 0.97, "learning_rate": 2.9383785737659984e-06, "loss": 1.8643, "step": 4762 }, { "epoch": 0.97, "learning_rate": 2.9029041929423684e-06, "loss": 1.876, "step": 4763 }, { "epoch": 0.97, "learning_rate": 2.8676446282517864e-06, "loss": 1.9043, "step": 4764 }, { "epoch": 0.97, "learning_rate": 2.8325998949314534e-06, "loss": 1.9541, "step": 4765 }, { "epoch": 0.97, "learning_rate": 2.797770008125533e-06, "loss": 1.8877, "step": 4766 }, { "epoch": 0.97, "learning_rate": 2.763154982885541e-06, "loss": 1.7559, "step": 4767 }, { "epoch": 0.97, "learning_rate": 2.7287548341700663e-06, "loss": 1.793, "step": 4768 }, { "epoch": 0.97, "learning_rate": 2.694569576844774e-06, "loss": 1.8936, "step": 4769 }, { "epoch": 0.97, "learning_rate": 2.660599225682514e-06, "loss": 1.8086, "step": 4770 }, { "epoch": 0.97, "learning_rate": 2.6268437953633205e-06, "loss": 1.8281, "step": 4771 }, { "epoch": 0.97, "learning_rate": 2.5933033004743032e-06, "loss": 1.7832, "step": 4772 }, { "epoch": 0.97, "learning_rate": 2.559977755509646e-06, "loss": 1.8291, "step": 4773 }, { "epoch": 0.97, "learning_rate": 2.5268671748707173e-06, "loss": 1.8955, "step": 4774 }, { "epoch": 0.97, "learning_rate": 2.4939715728659607e-06, "loss": 1.8096, "step": 4775 }, { "epoch": 0.97, "learning_rate": 2.4612909637109495e-06, "loss": 1.7363, "step": 4776 }, { "epoch": 0.97, "learning_rate": 2.4288253615282753e-06, "loss": 1.7256, "step": 4777 }, { "epoch": 0.97, "learning_rate": 2.3965747803477155e-06, "loss": 1.8564, "step": 4778 }, { "epoch": 0.97, "learning_rate": 2.364539234106011e-06, "loss": 1.6895, "step": 4779 }, { "epoch": 0.97, "learning_rate": 2.332718736647088e-06, "loss": 1.8682, "step": 4780 }, { "epoch": 0.97, "learning_rate": 2.301113301721891e-06, "loss": 1.876, "step": 4781 }, { "epoch": 0.97, "learning_rate": 2.2697229429883837e-06, "loss": 1.8271, "step": 4782 }, { "epoch": 0.97, "learning_rate": 2.238547674011715e-06, "loss": 1.8115, "step": 4783 }, { "epoch": 0.97, "learning_rate": 2.2075875082639417e-06, "loss": 1.8242, "step": 4784 }, { "epoch": 0.97, "learning_rate": 2.1768424591241954e-06, "loss": 1.7559, "step": 4785 }, { "epoch": 0.97, "learning_rate": 2.1463125398787365e-06, "loss": 1.7783, "step": 4786 }, { "epoch": 0.97, "learning_rate": 2.115997763720845e-06, "loss": 1.8848, "step": 4787 }, { "epoch": 0.97, "learning_rate": 2.085898143750653e-06, "loss": 1.7803, "step": 4788 }, { "epoch": 0.97, "learning_rate": 2.0560136929755893e-06, "loss": 1.793, "step": 4789 }, { "epoch": 0.97, "learning_rate": 2.026344424309823e-06, "loss": 1.876, "step": 4790 }, { "epoch": 0.97, "learning_rate": 1.996890350574765e-06, "loss": 1.8701, "step": 4791 }, { "epoch": 0.97, "learning_rate": 1.967651484498734e-06, "loss": 1.7773, "step": 4792 }, { "epoch": 0.97, "learning_rate": 1.9386278387169553e-06, "loss": 1.7881, "step": 4793 }, { "epoch": 0.97, "learning_rate": 1.9098194257718414e-06, "loss": 1.7402, "step": 4794 }, { "epoch": 0.97, "learning_rate": 1.881226258112656e-06, "loss": 1.9521, "step": 4795 }, { "epoch": 0.97, "learning_rate": 1.8528483480956814e-06, "loss": 1.8262, "step": 4796 }, { "epoch": 0.97, "learning_rate": 1.824685707984164e-06, "loss": 1.8301, "step": 4797 }, { "epoch": 0.97, "learning_rate": 1.7967383499483681e-06, "loss": 1.8242, "step": 4798 }, { "epoch": 0.97, "learning_rate": 1.7690062860654666e-06, "loss": 1.8174, "step": 4799 }, { "epoch": 0.97, "learning_rate": 1.7414895283197063e-06, "loss": 1.8438, "step": 4800 }, { "epoch": 0.97, "learning_rate": 1.714188088602131e-06, "loss": 1.8818, "step": 4801 }, { "epoch": 0.97, "learning_rate": 1.6871019787108587e-06, "loss": 1.8135, "step": 4802 }, { "epoch": 0.97, "learning_rate": 1.6602312103508598e-06, "loss": 1.8691, "step": 4803 }, { "epoch": 0.98, "learning_rate": 1.633575795134179e-06, "loss": 1.7725, "step": 4804 }, { "epoch": 0.98, "learning_rate": 1.6071357445796576e-06, "loss": 1.916, "step": 4805 }, { "epoch": 0.98, "learning_rate": 1.5809110701131557e-06, "loss": 1.9189, "step": 4806 }, { "epoch": 0.98, "learning_rate": 1.5549017830674418e-06, "loss": 1.9463, "step": 4807 }, { "epoch": 0.98, "learning_rate": 1.5291078946821912e-06, "loss": 1.8105, "step": 4808 }, { "epoch": 0.98, "learning_rate": 1.503529416103988e-06, "loss": 1.9365, "step": 4809 }, { "epoch": 0.98, "learning_rate": 1.4781663583863792e-06, "loss": 1.7656, "step": 4810 }, { "epoch": 0.98, "learning_rate": 1.4530187324897082e-06, "loss": 1.875, "step": 4811 }, { "epoch": 0.98, "learning_rate": 1.4280865492814487e-06, "loss": 1.9717, "step": 4812 }, { "epoch": 0.98, "learning_rate": 1.4033698195357048e-06, "loss": 1.9414, "step": 4813 }, { "epoch": 0.98, "learning_rate": 1.3788685539335989e-06, "loss": 1.9326, "step": 4814 }, { "epoch": 0.98, "learning_rate": 1.3545827630632168e-06, "loss": 1.7832, "step": 4815 }, { "epoch": 0.98, "learning_rate": 1.3305124574193306e-06, "loss": 1.8076, "step": 4816 }, { "epoch": 0.98, "learning_rate": 1.3066576474038417e-06, "loss": 1.876, "step": 4817 }, { "epoch": 0.98, "learning_rate": 1.2830183433252818e-06, "loss": 1.8877, "step": 4818 }, { "epoch": 0.98, "learning_rate": 1.2595945553992573e-06, "loss": 1.8369, "step": 4819 }, { "epoch": 0.98, "learning_rate": 1.2363862937481152e-06, "loss": 1.9229, "step": 4820 }, { "epoch": 0.98, "learning_rate": 1.2133935684010556e-06, "loss": 1.9248, "step": 4821 }, { "epoch": 0.98, "learning_rate": 1.1906163892942967e-06, "loss": 1.8359, "step": 4822 }, { "epoch": 0.98, "learning_rate": 1.1680547662706875e-06, "loss": 1.8047, "step": 4823 }, { "epoch": 0.98, "learning_rate": 1.14570870908004e-06, "loss": 1.8691, "step": 4824 }, { "epoch": 0.98, "learning_rate": 1.123578227379074e-06, "loss": 1.8223, "step": 4825 }, { "epoch": 0.98, "learning_rate": 1.1016633307312507e-06, "loss": 1.7725, "step": 4826 }, { "epoch": 0.98, "learning_rate": 1.0799640286068834e-06, "loss": 1.8799, "step": 4827 }, { "epoch": 0.98, "learning_rate": 1.0584803303831381e-06, "loss": 1.9551, "step": 4828 }, { "epoch": 0.98, "learning_rate": 1.0372122453440324e-06, "loss": 1.9004, "step": 4829 }, { "epoch": 0.98, "learning_rate": 1.016159782680326e-06, "loss": 1.8145, "step": 4830 }, { "epoch": 0.98, "learning_rate": 9.95322951489741e-07, "loss": 1.8564, "step": 4831 }, { "epoch": 0.98, "learning_rate": 9.747017607766862e-07, "loss": 1.9453, "step": 4832 }, { "epoch": 0.98, "learning_rate": 9.542962194524217e-07, "loss": 1.8877, "step": 4833 }, { "epoch": 0.98, "learning_rate": 9.341063363349501e-07, "loss": 1.7793, "step": 4834 }, { "epoch": 0.98, "learning_rate": 9.141321201492914e-07, "loss": 1.7559, "step": 4835 }, { "epoch": 0.98, "learning_rate": 8.943735795270969e-07, "loss": 1.9229, "step": 4836 }, { "epoch": 0.98, "learning_rate": 8.748307230067587e-07, "loss": 1.6689, "step": 4837 }, { "epoch": 0.98, "learning_rate": 8.555035590336324e-07, "loss": 1.8584, "step": 4838 }, { "epoch": 0.98, "learning_rate": 8.363920959597038e-07, "loss": 1.8428, "step": 4839 }, { "epoch": 0.98, "learning_rate": 8.174963420439219e-07, "loss": 1.8496, "step": 4840 }, { "epoch": 0.98, "learning_rate": 7.98816305451866e-07, "loss": 1.8516, "step": 4841 }, { "epoch": 0.98, "learning_rate": 7.803519942559123e-07, "loss": 1.7734, "step": 4842 }, { "epoch": 0.98, "learning_rate": 7.621034164353446e-07, "loss": 1.8643, "step": 4843 }, { "epoch": 0.98, "learning_rate": 7.440705798760217e-07, "loss": 1.9531, "step": 4844 }, { "epoch": 0.98, "learning_rate": 7.262534923707654e-07, "loss": 1.7949, "step": 4845 }, { "epoch": 0.98, "learning_rate": 7.086521616190278e-07, "loss": 1.8965, "step": 4846 }, { "epoch": 0.98, "learning_rate": 6.91266595227058e-07, "loss": 1.8789, "step": 4847 }, { "epoch": 0.98, "learning_rate": 6.740968007079018e-07, "loss": 1.7422, "step": 4848 }, { "epoch": 0.98, "learning_rate": 6.571427854813461e-07, "loss": 1.8301, "step": 4849 }, { "epoch": 0.98, "learning_rate": 6.404045568738637e-07, "loss": 1.8105, "step": 4850 }, { "epoch": 0.98, "learning_rate": 6.238821221187796e-07, "loss": 1.874, "step": 4851 }, { "epoch": 0.98, "learning_rate": 6.075754883561601e-07, "loss": 1.8457, "step": 4852 }, { "epoch": 0.98, "learning_rate": 5.914846626326464e-07, "loss": 1.8232, "step": 4853 }, { "epoch": 0.99, "learning_rate": 5.756096519018428e-07, "loss": 1.8828, "step": 4854 }, { "epoch": 0.99, "learning_rate": 5.599504630239838e-07, "loss": 1.7539, "step": 4855 }, { "epoch": 0.99, "learning_rate": 5.445071027659898e-07, "loss": 1.874, "step": 4856 }, { "epoch": 0.99, "learning_rate": 5.29279577801689e-07, "loss": 1.6904, "step": 4857 }, { "epoch": 0.99, "learning_rate": 5.142678947114288e-07, "loss": 1.7881, "step": 4858 }, { "epoch": 0.99, "learning_rate": 4.994720599824087e-07, "loss": 1.9111, "step": 4859 }, { "epoch": 0.99, "learning_rate": 4.848920800085143e-07, "loss": 1.8643, "step": 4860 }, { "epoch": 0.99, "learning_rate": 4.7052796109031683e-07, "loss": 1.7578, "step": 4861 }, { "epoch": 0.99, "learning_rate": 4.5637970943523953e-07, "loss": 1.9434, "step": 4862 }, { "epoch": 0.99, "learning_rate": 4.4244733115722525e-07, "loss": 1.8271, "step": 4863 }, { "epoch": 0.99, "learning_rate": 4.2873083227706887e-07, "loss": 1.8682, "step": 4864 }, { "epoch": 0.99, "learning_rate": 4.152302187222512e-07, "loss": 1.7979, "step": 4865 }, { "epoch": 0.99, "learning_rate": 4.0194549632693867e-07, "loss": 1.9092, "step": 4866 }, { "epoch": 0.99, "learning_rate": 3.888766708319835e-07, "loss": 1.8604, "step": 4867 }, { "epoch": 0.99, "learning_rate": 3.7602374788497927e-07, "loss": 1.7676, "step": 4868 }, { "epoch": 0.99, "learning_rate": 3.633867330402052e-07, "loss": 1.7734, "step": 4869 }, { "epoch": 0.99, "learning_rate": 3.509656317585708e-07, "loss": 1.7607, "step": 4870 }, { "epoch": 0.99, "learning_rate": 3.387604494078933e-07, "loss": 1.8037, "step": 4871 }, { "epoch": 0.99, "learning_rate": 3.267711912623983e-07, "loss": 1.8232, "step": 4872 }, { "epoch": 0.99, "learning_rate": 3.14997862503219e-07, "loss": 1.8477, "step": 4873 }, { "epoch": 0.99, "learning_rate": 3.034404682180081e-07, "loss": 1.8438, "step": 4874 }, { "epoch": 0.99, "learning_rate": 2.9209901340132574e-07, "loss": 1.8564, "step": 4875 }, { "epoch": 0.99, "learning_rate": 2.809735029541405e-07, "loss": 1.7773, "step": 4876 }, { "epoch": 0.99, "learning_rate": 2.700639416843287e-07, "loss": 1.8408, "step": 4877 }, { "epoch": 0.99, "learning_rate": 2.593703343063414e-07, "loss": 1.8418, "step": 4878 }, { "epoch": 0.99, "learning_rate": 2.488926854413154e-07, "loss": 1.8652, "step": 4879 }, { "epoch": 0.99, "learning_rate": 2.3863099961707323e-07, "loss": 1.9023, "step": 4880 }, { "epoch": 0.99, "learning_rate": 2.2858528126812327e-07, "loss": 1.751, "step": 4881 }, { "epoch": 0.99, "learning_rate": 2.1875553473565956e-07, "loss": 1.8057, "step": 4882 }, { "epoch": 0.99, "learning_rate": 2.0914176426750642e-07, "loss": 1.917, "step": 4883 }, { "epoch": 0.99, "learning_rate": 1.9974397401811838e-07, "loss": 1.7793, "step": 4884 }, { "epoch": 0.99, "learning_rate": 1.905621680487468e-07, "loss": 1.8271, "step": 4885 }, { "epoch": 0.99, "learning_rate": 1.8159635032716227e-07, "loss": 1.7451, "step": 4886 }, { "epoch": 0.99, "learning_rate": 1.7284652472787654e-07, "loss": 1.8174, "step": 4887 }, { "epoch": 0.99, "learning_rate": 1.6431269503208723e-07, "loss": 1.8193, "step": 4888 }, { "epoch": 0.99, "learning_rate": 1.5599486492756664e-07, "loss": 1.7148, "step": 4889 }, { "epoch": 0.99, "learning_rate": 1.4789303800882835e-07, "loss": 1.8848, "step": 4890 }, { "epoch": 0.99, "learning_rate": 1.4000721777701618e-07, "loss": 1.7568, "step": 4891 }, { "epoch": 0.99, "learning_rate": 1.3233740763990422e-07, "loss": 1.7686, "step": 4892 }, { "epoch": 0.99, "learning_rate": 1.248836109118967e-07, "loss": 1.7725, "step": 4893 }, { "epoch": 0.99, "learning_rate": 1.1764583081408375e-07, "loss": 1.8115, "step": 4894 }, { "epoch": 0.99, "learning_rate": 1.1062407047429668e-07, "loss": 1.8242, "step": 4895 }, { "epoch": 0.99, "learning_rate": 1.0381833292683052e-07, "loss": 1.8799, "step": 4896 }, { "epoch": 0.99, "learning_rate": 9.722862111277708e-08, "loss": 1.8896, "step": 4897 }, { "epoch": 0.99, "learning_rate": 9.085493787980293e-08, "loss": 1.8711, "step": 4898 }, { "epoch": 0.99, "learning_rate": 8.469728598220483e-08, "loss": 1.8135, "step": 4899 }, { "epoch": 0.99, "learning_rate": 7.875566808107637e-08, "loss": 1.8672, "step": 4900 }, { "epoch": 0.99, "learning_rate": 7.30300867439193e-08, "loss": 1.8516, "step": 4901 }, { "epoch": 0.99, "learning_rate": 6.752054444497669e-08, "loss": 1.833, "step": 4902 }, { "epoch": 1.0, "learning_rate": 6.22270435652883e-08, "loss": 1.7939, "step": 4903 }, { "epoch": 1.0, "learning_rate": 5.714958639224666e-08, "loss": 1.7266, "step": 4904 }, { "epoch": 1.0, "learning_rate": 5.228817512015205e-08, "loss": 1.8477, "step": 4905 }, { "epoch": 1.0, "learning_rate": 4.7642811849712974e-08, "loss": 1.8848, "step": 4906 }, { "epoch": 1.0, "learning_rate": 4.3213498588434706e-08, "loss": 1.8584, "step": 4907 }, { "epoch": 1.0, "learning_rate": 3.900023725039725e-08, "loss": 1.8008, "step": 4908 }, { "epoch": 1.0, "learning_rate": 3.500302965636637e-08, "loss": 1.8379, "step": 4909 }, { "epoch": 1.0, "learning_rate": 3.122187753362704e-08, "loss": 1.7432, "step": 4910 }, { "epoch": 1.0, "learning_rate": 2.7656782516205513e-08, "loss": 1.8809, "step": 4911 }, { "epoch": 1.0, "learning_rate": 2.4307746144702768e-08, "loss": 1.8721, "step": 4912 }, { "epoch": 1.0, "learning_rate": 2.1174769866461053e-08, "loss": 1.8232, "step": 4913 }, { "epoch": 1.0, "learning_rate": 1.8257855035230807e-08, "loss": 1.6973, "step": 4914 }, { "epoch": 1.0, "learning_rate": 1.5557002911670282e-08, "loss": 1.8818, "step": 4915 }, { "epoch": 1.0, "learning_rate": 1.3072214662845916e-08, "loss": 1.8936, "step": 4916 }, { "epoch": 1.0, "learning_rate": 1.080349136250991e-08, "loss": 1.8418, "step": 4917 }, { "epoch": 1.0, "learning_rate": 8.750833991155727e-09, "loss": 1.9121, "step": 4918 }, { "epoch": 1.0, "learning_rate": 6.914243435796053e-09, "loss": 1.8457, "step": 4919 }, { "epoch": 1.0, "learning_rate": 5.2937204900738165e-09, "loss": 1.8447, "step": 4920 }, { "epoch": 1.0, "learning_rate": 3.889265854317703e-09, "loss": 1.748, "step": 4921 }, { "epoch": 1.0, "learning_rate": 2.7008801353756163e-09, "loss": 1.876, "step": 4922 }, { "epoch": 1.0, "learning_rate": 1.7285638468922393e-09, "loss": 1.7412, "step": 4923 }, { "epoch": 1.0, "learning_rate": 9.723174089759645e-10, "loss": 1.8496, "step": 4924 }, { "epoch": 1.0, "learning_rate": 4.321411484764504e-10, "loss": 1.8525, "step": 4925 }, { "epoch": 1.0, "learning_rate": 1.0803529876257656e-10, "loss": 1.8291, "step": 4926 }, { "epoch": 1.0, "learning_rate": 0.0, "loss": 1.7939, "step": 4927 }, { "epoch": 1.0, "step": 4927, "total_flos": 3.748687970375303e+18, "train_loss": 2.0709812480972194, "train_runtime": 81625.4527, "train_samples_per_second": 15.45, "train_steps_per_second": 0.06 } ], "max_steps": 4927, "num_train_epochs": 1, "total_flos": 3.748687970375303e+18, "trial_name": null, "trial_params": null }