|
{ |
|
"best_metric": 0.9602224123182207, |
|
"best_model_checkpoint": "vit-msn-small-lateral_flow_ivalidation_green/checkpoint-32", |
|
"epoch": 92.3076923076923, |
|
"eval_steps": 500, |
|
"global_step": 600, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.9230769230769231, |
|
"eval_accuracy": 0.9238665526090676, |
|
"eval_loss": 0.5034469366073608, |
|
"eval_runtime": 9.6118, |
|
"eval_samples_per_second": 243.243, |
|
"eval_steps_per_second": 3.849, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 1.5384615384615383, |
|
"grad_norm": 13.861084938049316, |
|
"learning_rate": 8.333333333333334e-06, |
|
"loss": 0.704, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.9409751924721984, |
|
"eval_loss": 0.45238828659057617, |
|
"eval_runtime": 9.8069, |
|
"eval_samples_per_second": 238.404, |
|
"eval_steps_per_second": 3.773, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 2.9230769230769234, |
|
"eval_accuracy": 0.9593669803250642, |
|
"eval_loss": 0.26334497332572937, |
|
"eval_runtime": 9.5224, |
|
"eval_samples_per_second": 245.527, |
|
"eval_steps_per_second": 3.886, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 3.076923076923077, |
|
"grad_norm": 5.300178527832031, |
|
"learning_rate": 1.6666666666666667e-05, |
|
"loss": 0.505, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.8092386655260907, |
|
"eval_loss": 0.4748455882072449, |
|
"eval_runtime": 9.8908, |
|
"eval_samples_per_second": 236.381, |
|
"eval_steps_per_second": 3.741, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 4.615384615384615, |
|
"grad_norm": 14.291367530822754, |
|
"learning_rate": 2.5e-05, |
|
"loss": 0.4456, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 4.923076923076923, |
|
"eval_accuracy": 0.9602224123182207, |
|
"eval_loss": 0.29173824191093445, |
|
"eval_runtime": 10.0407, |
|
"eval_samples_per_second": 232.852, |
|
"eval_steps_per_second": 3.685, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.9221556886227545, |
|
"eval_loss": 0.2621428668498993, |
|
"eval_runtime": 9.9631, |
|
"eval_samples_per_second": 234.666, |
|
"eval_steps_per_second": 3.714, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 6.153846153846154, |
|
"grad_norm": 8.197896957397461, |
|
"learning_rate": 3.3333333333333335e-05, |
|
"loss": 0.3908, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 6.923076923076923, |
|
"eval_accuracy": 0.8190761334473909, |
|
"eval_loss": 0.4519110918045044, |
|
"eval_runtime": 9.949, |
|
"eval_samples_per_second": 234.998, |
|
"eval_steps_per_second": 3.719, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 7.6923076923076925, |
|
"grad_norm": 7.590042591094971, |
|
"learning_rate": 4.166666666666667e-05, |
|
"loss": 0.3628, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.8622754491017964, |
|
"eval_loss": 0.4092731475830078, |
|
"eval_runtime": 10.096, |
|
"eval_samples_per_second": 231.578, |
|
"eval_steps_per_second": 3.665, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 8.923076923076923, |
|
"eval_accuracy": 0.935414884516681, |
|
"eval_loss": 0.2705248296260834, |
|
"eval_runtime": 10.1632, |
|
"eval_samples_per_second": 230.046, |
|
"eval_steps_per_second": 3.641, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 9.23076923076923, |
|
"grad_norm": 10.925556182861328, |
|
"learning_rate": 5e-05, |
|
"loss": 0.372, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.8545765611633875, |
|
"eval_loss": 0.41367459297180176, |
|
"eval_runtime": 10.0011, |
|
"eval_samples_per_second": 233.774, |
|
"eval_steps_per_second": 3.7, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 10.76923076923077, |
|
"grad_norm": 8.855840682983398, |
|
"learning_rate": 4.9074074074074075e-05, |
|
"loss": 0.36, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 10.923076923076923, |
|
"eval_accuracy": 0.8815226689478186, |
|
"eval_loss": 0.34931161999702454, |
|
"eval_runtime": 9.9523, |
|
"eval_samples_per_second": 234.921, |
|
"eval_steps_per_second": 3.718, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.9456800684345594, |
|
"eval_loss": 0.21901701390743256, |
|
"eval_runtime": 9.6913, |
|
"eval_samples_per_second": 241.248, |
|
"eval_steps_per_second": 3.818, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 12.307692307692308, |
|
"grad_norm": 7.141057014465332, |
|
"learning_rate": 4.814814814814815e-05, |
|
"loss": 0.36, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 12.923076923076923, |
|
"eval_accuracy": 0.9033361847733106, |
|
"eval_loss": 0.3190420866012573, |
|
"eval_runtime": 9.7798, |
|
"eval_samples_per_second": 239.064, |
|
"eval_steps_per_second": 3.783, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 13.846153846153847, |
|
"grad_norm": 11.677677154541016, |
|
"learning_rate": 4.722222222222222e-05, |
|
"loss": 0.3363, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_accuracy": 0.894781864841745, |
|
"eval_loss": 0.337951123714447, |
|
"eval_runtime": 9.7204, |
|
"eval_samples_per_second": 240.525, |
|
"eval_steps_per_second": 3.806, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 14.923076923076923, |
|
"eval_accuracy": 0.8982035928143712, |
|
"eval_loss": 0.3342379331588745, |
|
"eval_runtime": 9.9394, |
|
"eval_samples_per_second": 235.226, |
|
"eval_steps_per_second": 3.723, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 15.384615384615385, |
|
"grad_norm": 3.7460684776306152, |
|
"learning_rate": 4.62962962962963e-05, |
|
"loss": 0.327, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_accuracy": 0.8327630453378957, |
|
"eval_loss": 0.4211990237236023, |
|
"eval_runtime": 9.8418, |
|
"eval_samples_per_second": 237.559, |
|
"eval_steps_per_second": 3.759, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 16.923076923076923, |
|
"grad_norm": 12.34093952178955, |
|
"learning_rate": 4.5370370370370374e-05, |
|
"loss": 0.3257, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 16.923076923076923, |
|
"eval_accuracy": 0.7844311377245509, |
|
"eval_loss": 0.5167170763015747, |
|
"eval_runtime": 9.8234, |
|
"eval_samples_per_second": 238.004, |
|
"eval_steps_per_second": 3.767, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_accuracy": 0.7275449101796407, |
|
"eval_loss": 0.5847879648208618, |
|
"eval_runtime": 9.927, |
|
"eval_samples_per_second": 235.518, |
|
"eval_steps_per_second": 3.727, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 18.46153846153846, |
|
"grad_norm": 5.46023416519165, |
|
"learning_rate": 4.4444444444444447e-05, |
|
"loss": 0.3175, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 18.923076923076923, |
|
"eval_accuracy": 0.8336184773310522, |
|
"eval_loss": 0.4090871810913086, |
|
"eval_runtime": 10.1427, |
|
"eval_samples_per_second": 230.511, |
|
"eval_steps_per_second": 3.648, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"grad_norm": 9.724111557006836, |
|
"learning_rate": 4.351851851851852e-05, |
|
"loss": 0.3377, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_accuracy": 0.9161676646706587, |
|
"eval_loss": 0.28380292654037476, |
|
"eval_runtime": 10.1341, |
|
"eval_samples_per_second": 230.706, |
|
"eval_steps_per_second": 3.651, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 20.923076923076923, |
|
"eval_accuracy": 0.7262617621899059, |
|
"eval_loss": 0.6106137633323669, |
|
"eval_runtime": 9.9506, |
|
"eval_samples_per_second": 234.962, |
|
"eval_steps_per_second": 3.718, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 21.53846153846154, |
|
"grad_norm": 5.13586950302124, |
|
"learning_rate": 4.259259259259259e-05, |
|
"loss": 0.3129, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 22.0, |
|
"eval_accuracy": 0.7164242942686057, |
|
"eval_loss": 0.6294828057289124, |
|
"eval_runtime": 9.9909, |
|
"eval_samples_per_second": 234.013, |
|
"eval_steps_per_second": 3.703, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 22.923076923076923, |
|
"eval_accuracy": 0.5932420872540634, |
|
"eval_loss": 0.7897723913192749, |
|
"eval_runtime": 9.9526, |
|
"eval_samples_per_second": 234.912, |
|
"eval_steps_per_second": 3.718, |
|
"step": 149 |
|
}, |
|
{ |
|
"epoch": 23.076923076923077, |
|
"grad_norm": 15.414055824279785, |
|
"learning_rate": 4.166666666666667e-05, |
|
"loss": 0.3138, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"eval_accuracy": 0.4846022241231822, |
|
"eval_loss": 0.9407968521118164, |
|
"eval_runtime": 9.7597, |
|
"eval_samples_per_second": 239.556, |
|
"eval_steps_per_second": 3.791, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 24.615384615384617, |
|
"grad_norm": 3.9247374534606934, |
|
"learning_rate": 4.074074074074074e-05, |
|
"loss": 0.3106, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 24.923076923076923, |
|
"eval_accuracy": 0.8832335329341318, |
|
"eval_loss": 0.34852102398872375, |
|
"eval_runtime": 9.8091, |
|
"eval_samples_per_second": 238.35, |
|
"eval_steps_per_second": 3.772, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 26.0, |
|
"eval_accuracy": 0.7865697177074422, |
|
"eval_loss": 0.5201271176338196, |
|
"eval_runtime": 9.9386, |
|
"eval_samples_per_second": 235.245, |
|
"eval_steps_per_second": 3.723, |
|
"step": 169 |
|
}, |
|
{ |
|
"epoch": 26.153846153846153, |
|
"grad_norm": 5.763908386230469, |
|
"learning_rate": 3.981481481481482e-05, |
|
"loss": 0.3157, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 26.923076923076923, |
|
"eval_accuracy": 0.6672369546621043, |
|
"eval_loss": 0.72103351354599, |
|
"eval_runtime": 9.9351, |
|
"eval_samples_per_second": 235.327, |
|
"eval_steps_per_second": 3.724, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 27.692307692307693, |
|
"grad_norm": 5.021239757537842, |
|
"learning_rate": 3.888888888888889e-05, |
|
"loss": 0.2896, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"eval_accuracy": 0.6330196749358425, |
|
"eval_loss": 0.7980794906616211, |
|
"eval_runtime": 10.2592, |
|
"eval_samples_per_second": 227.892, |
|
"eval_steps_per_second": 3.607, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 28.923076923076923, |
|
"eval_accuracy": 0.6428571428571429, |
|
"eval_loss": 0.7667437791824341, |
|
"eval_runtime": 9.9249, |
|
"eval_samples_per_second": 235.57, |
|
"eval_steps_per_second": 3.728, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 29.23076923076923, |
|
"grad_norm": 8.59469985961914, |
|
"learning_rate": 3.7962962962962964e-05, |
|
"loss": 0.2867, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"eval_accuracy": 0.6544054747647562, |
|
"eval_loss": 0.7686835527420044, |
|
"eval_runtime": 10.0746, |
|
"eval_samples_per_second": 232.069, |
|
"eval_steps_per_second": 3.673, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 30.76923076923077, |
|
"grad_norm": 5.601044654846191, |
|
"learning_rate": 3.7037037037037037e-05, |
|
"loss": 0.2786, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 30.923076923076923, |
|
"eval_accuracy": 0.5209580838323353, |
|
"eval_loss": 1.1714286804199219, |
|
"eval_runtime": 9.9004, |
|
"eval_samples_per_second": 236.151, |
|
"eval_steps_per_second": 3.737, |
|
"step": 201 |
|
}, |
|
{ |
|
"epoch": 32.0, |
|
"eval_accuracy": 0.42728828058169377, |
|
"eval_loss": 1.1744341850280762, |
|
"eval_runtime": 9.8292, |
|
"eval_samples_per_second": 237.862, |
|
"eval_steps_per_second": 3.764, |
|
"step": 208 |
|
}, |
|
{ |
|
"epoch": 32.30769230769231, |
|
"grad_norm": 4.0939507484436035, |
|
"learning_rate": 3.611111111111111e-05, |
|
"loss": 0.2823, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 32.92307692307692, |
|
"eval_accuracy": 0.5444824636441403, |
|
"eval_loss": 0.9260274767875671, |
|
"eval_runtime": 9.8098, |
|
"eval_samples_per_second": 238.334, |
|
"eval_steps_per_second": 3.772, |
|
"step": 214 |
|
}, |
|
{ |
|
"epoch": 33.84615384615385, |
|
"grad_norm": 5.409052848815918, |
|
"learning_rate": 3.518518518518519e-05, |
|
"loss": 0.2864, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 34.0, |
|
"eval_accuracy": 0.6920444824636441, |
|
"eval_loss": 0.7139692902565002, |
|
"eval_runtime": 9.9272, |
|
"eval_samples_per_second": 235.514, |
|
"eval_steps_per_second": 3.727, |
|
"step": 221 |
|
}, |
|
{ |
|
"epoch": 34.92307692307692, |
|
"eval_accuracy": 0.7331052181351583, |
|
"eval_loss": 0.6098384857177734, |
|
"eval_runtime": 9.8226, |
|
"eval_samples_per_second": 238.024, |
|
"eval_steps_per_second": 3.767, |
|
"step": 227 |
|
}, |
|
{ |
|
"epoch": 35.38461538461539, |
|
"grad_norm": 3.7521326541900635, |
|
"learning_rate": 3.425925925925926e-05, |
|
"loss": 0.2707, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 36.0, |
|
"eval_accuracy": 0.6783575705731394, |
|
"eval_loss": 0.6992803812026978, |
|
"eval_runtime": 9.8614, |
|
"eval_samples_per_second": 237.087, |
|
"eval_steps_per_second": 3.752, |
|
"step": 234 |
|
}, |
|
{ |
|
"epoch": 36.92307692307692, |
|
"grad_norm": 5.1208319664001465, |
|
"learning_rate": 3.3333333333333335e-05, |
|
"loss": 0.2921, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 36.92307692307692, |
|
"eval_accuracy": 0.6176218990590248, |
|
"eval_loss": 0.8719092607498169, |
|
"eval_runtime": 10.0768, |
|
"eval_samples_per_second": 232.019, |
|
"eval_steps_per_second": 3.672, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 38.0, |
|
"eval_accuracy": 0.6060735671514115, |
|
"eval_loss": 0.8336823582649231, |
|
"eval_runtime": 9.8195, |
|
"eval_samples_per_second": 238.098, |
|
"eval_steps_per_second": 3.768, |
|
"step": 247 |
|
}, |
|
{ |
|
"epoch": 38.46153846153846, |
|
"grad_norm": 6.667710304260254, |
|
"learning_rate": 3.240740740740741e-05, |
|
"loss": 0.2849, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 38.92307692307692, |
|
"eval_accuracy": 0.825491873396065, |
|
"eval_loss": 0.4395623505115509, |
|
"eval_runtime": 9.6605, |
|
"eval_samples_per_second": 242.015, |
|
"eval_steps_per_second": 3.83, |
|
"step": 253 |
|
}, |
|
{ |
|
"epoch": 40.0, |
|
"grad_norm": 5.09765625, |
|
"learning_rate": 3.148148148148148e-05, |
|
"loss": 0.2657, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 40.0, |
|
"eval_accuracy": 0.501710863986313, |
|
"eval_loss": 1.0981603860855103, |
|
"eval_runtime": 9.9306, |
|
"eval_samples_per_second": 235.433, |
|
"eval_steps_per_second": 3.726, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 40.92307692307692, |
|
"eval_accuracy": 0.5175363558597091, |
|
"eval_loss": 1.093379259109497, |
|
"eval_runtime": 9.7012, |
|
"eval_samples_per_second": 241.001, |
|
"eval_steps_per_second": 3.814, |
|
"step": 266 |
|
}, |
|
{ |
|
"epoch": 41.53846153846154, |
|
"grad_norm": 3.8766767978668213, |
|
"learning_rate": 3.055555555555556e-05, |
|
"loss": 0.2659, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 42.0, |
|
"eval_accuracy": 0.636869118905047, |
|
"eval_loss": 0.8629169464111328, |
|
"eval_runtime": 9.9076, |
|
"eval_samples_per_second": 235.98, |
|
"eval_steps_per_second": 3.734, |
|
"step": 273 |
|
}, |
|
{ |
|
"epoch": 42.92307692307692, |
|
"eval_accuracy": 0.41402908468776733, |
|
"eval_loss": 1.4602264165878296, |
|
"eval_runtime": 9.7024, |
|
"eval_samples_per_second": 240.972, |
|
"eval_steps_per_second": 3.813, |
|
"step": 279 |
|
}, |
|
{ |
|
"epoch": 43.07692307692308, |
|
"grad_norm": 7.324178695678711, |
|
"learning_rate": 2.962962962962963e-05, |
|
"loss": 0.2645, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 44.0, |
|
"eval_accuracy": 0.3421727972626176, |
|
"eval_loss": 1.9095213413238525, |
|
"eval_runtime": 10.1083, |
|
"eval_samples_per_second": 231.294, |
|
"eval_steps_per_second": 3.66, |
|
"step": 286 |
|
}, |
|
{ |
|
"epoch": 44.61538461538461, |
|
"grad_norm": 3.6502673625946045, |
|
"learning_rate": 2.8703703703703706e-05, |
|
"loss": 0.2424, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 44.92307692307692, |
|
"eval_accuracy": 0.43969204448246363, |
|
"eval_loss": 1.2180449962615967, |
|
"eval_runtime": 9.8511, |
|
"eval_samples_per_second": 237.334, |
|
"eval_steps_per_second": 3.756, |
|
"step": 292 |
|
}, |
|
{ |
|
"epoch": 46.0, |
|
"eval_accuracy": 0.6424294268605646, |
|
"eval_loss": 0.7686424255371094, |
|
"eval_runtime": 10.0098, |
|
"eval_samples_per_second": 233.572, |
|
"eval_steps_per_second": 3.696, |
|
"step": 299 |
|
}, |
|
{ |
|
"epoch": 46.15384615384615, |
|
"grad_norm": 4.265622138977051, |
|
"learning_rate": 2.777777777777778e-05, |
|
"loss": 0.2495, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 46.92307692307692, |
|
"eval_accuracy": 0.5795551753635586, |
|
"eval_loss": 0.9899386763572693, |
|
"eval_runtime": 9.9941, |
|
"eval_samples_per_second": 233.939, |
|
"eval_steps_per_second": 3.702, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 47.69230769230769, |
|
"grad_norm": 2.909935474395752, |
|
"learning_rate": 2.6851851851851855e-05, |
|
"loss": 0.2454, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 48.0, |
|
"eval_accuracy": 0.553464499572284, |
|
"eval_loss": 1.0290616750717163, |
|
"eval_runtime": 10.0955, |
|
"eval_samples_per_second": 231.587, |
|
"eval_steps_per_second": 3.665, |
|
"step": 312 |
|
}, |
|
{ |
|
"epoch": 48.92307692307692, |
|
"eval_accuracy": 0.6822070145423439, |
|
"eval_loss": 0.7534288167953491, |
|
"eval_runtime": 10.0774, |
|
"eval_samples_per_second": 232.004, |
|
"eval_steps_per_second": 3.672, |
|
"step": 318 |
|
}, |
|
{ |
|
"epoch": 49.23076923076923, |
|
"grad_norm": 6.560550212860107, |
|
"learning_rate": 2.5925925925925925e-05, |
|
"loss": 0.2473, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 50.0, |
|
"eval_accuracy": 0.7091531223267751, |
|
"eval_loss": 0.6591421961784363, |
|
"eval_runtime": 10.0255, |
|
"eval_samples_per_second": 233.204, |
|
"eval_steps_per_second": 3.691, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 50.76923076923077, |
|
"grad_norm": 10.7844820022583, |
|
"learning_rate": 2.5e-05, |
|
"loss": 0.2716, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 50.92307692307692, |
|
"eval_accuracy": 0.7455089820359282, |
|
"eval_loss": 0.58400559425354, |
|
"eval_runtime": 9.8847, |
|
"eval_samples_per_second": 236.527, |
|
"eval_steps_per_second": 3.743, |
|
"step": 331 |
|
}, |
|
{ |
|
"epoch": 52.0, |
|
"eval_accuracy": 0.47647562018819506, |
|
"eval_loss": 1.2430182695388794, |
|
"eval_runtime": 9.8852, |
|
"eval_samples_per_second": 236.515, |
|
"eval_steps_per_second": 3.743, |
|
"step": 338 |
|
}, |
|
{ |
|
"epoch": 52.30769230769231, |
|
"grad_norm": 7.81274938583374, |
|
"learning_rate": 2.4074074074074074e-05, |
|
"loss": 0.234, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 52.92307692307692, |
|
"eval_accuracy": 0.5145423438836613, |
|
"eval_loss": 1.299268126487732, |
|
"eval_runtime": 10.1543, |
|
"eval_samples_per_second": 230.247, |
|
"eval_steps_per_second": 3.644, |
|
"step": 344 |
|
}, |
|
{ |
|
"epoch": 53.84615384615385, |
|
"grad_norm": 8.652073860168457, |
|
"learning_rate": 2.314814814814815e-05, |
|
"loss": 0.2482, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 54.0, |
|
"eval_accuracy": 0.7172797262617622, |
|
"eval_loss": 0.6042024493217468, |
|
"eval_runtime": 10.1638, |
|
"eval_samples_per_second": 230.033, |
|
"eval_steps_per_second": 3.64, |
|
"step": 351 |
|
}, |
|
{ |
|
"epoch": 54.92307692307692, |
|
"eval_accuracy": 0.6026518391787853, |
|
"eval_loss": 0.8891559839248657, |
|
"eval_runtime": 9.9638, |
|
"eval_samples_per_second": 234.65, |
|
"eval_steps_per_second": 3.713, |
|
"step": 357 |
|
}, |
|
{ |
|
"epoch": 55.38461538461539, |
|
"grad_norm": 3.8207547664642334, |
|
"learning_rate": 2.2222222222222223e-05, |
|
"loss": 0.2339, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 56.0, |
|
"eval_accuracy": 0.316082121471343, |
|
"eval_loss": 1.8545583486557007, |
|
"eval_runtime": 9.9886, |
|
"eval_samples_per_second": 234.066, |
|
"eval_steps_per_second": 3.704, |
|
"step": 364 |
|
}, |
|
{ |
|
"epoch": 56.92307692307692, |
|
"grad_norm": 4.086548328399658, |
|
"learning_rate": 2.1296296296296296e-05, |
|
"loss": 0.2461, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 56.92307692307692, |
|
"eval_accuracy": 0.5359281437125748, |
|
"eval_loss": 1.0858689546585083, |
|
"eval_runtime": 9.9977, |
|
"eval_samples_per_second": 233.854, |
|
"eval_steps_per_second": 3.701, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 58.0, |
|
"eval_accuracy": 0.6176218990590248, |
|
"eval_loss": 0.8690257668495178, |
|
"eval_runtime": 10.0183, |
|
"eval_samples_per_second": 233.374, |
|
"eval_steps_per_second": 3.693, |
|
"step": 377 |
|
}, |
|
{ |
|
"epoch": 58.46153846153846, |
|
"grad_norm": 4.676217079162598, |
|
"learning_rate": 2.037037037037037e-05, |
|
"loss": 0.2395, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 58.92307692307692, |
|
"eval_accuracy": 0.6693755346449958, |
|
"eval_loss": 0.7557449340820312, |
|
"eval_runtime": 9.868, |
|
"eval_samples_per_second": 236.928, |
|
"eval_steps_per_second": 3.75, |
|
"step": 383 |
|
}, |
|
{ |
|
"epoch": 60.0, |
|
"grad_norm": 7.573139667510986, |
|
"learning_rate": 1.9444444444444445e-05, |
|
"loss": 0.2159, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 60.0, |
|
"eval_accuracy": 0.5701454234388366, |
|
"eval_loss": 1.053432822227478, |
|
"eval_runtime": 10.2535, |
|
"eval_samples_per_second": 228.02, |
|
"eval_steps_per_second": 3.609, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 60.92307692307692, |
|
"eval_accuracy": 0.5812660393498716, |
|
"eval_loss": 0.9855865240097046, |
|
"eval_runtime": 10.0776, |
|
"eval_samples_per_second": 231.999, |
|
"eval_steps_per_second": 3.672, |
|
"step": 396 |
|
}, |
|
{ |
|
"epoch": 61.53846153846154, |
|
"grad_norm": 4.489895820617676, |
|
"learning_rate": 1.8518518518518518e-05, |
|
"loss": 0.2309, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 62.0, |
|
"eval_accuracy": 0.5500427715996579, |
|
"eval_loss": 0.9999585151672363, |
|
"eval_runtime": 9.7878, |
|
"eval_samples_per_second": 238.869, |
|
"eval_steps_per_second": 3.78, |
|
"step": 403 |
|
}, |
|
{ |
|
"epoch": 62.92307692307692, |
|
"eval_accuracy": 0.5179640718562875, |
|
"eval_loss": 1.1939594745635986, |
|
"eval_runtime": 9.8311, |
|
"eval_samples_per_second": 237.818, |
|
"eval_steps_per_second": 3.764, |
|
"step": 409 |
|
}, |
|
{ |
|
"epoch": 63.07692307692308, |
|
"grad_norm": 6.18975830078125, |
|
"learning_rate": 1.7592592592592595e-05, |
|
"loss": 0.2117, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 64.0, |
|
"eval_accuracy": 0.5153977758768178, |
|
"eval_loss": 1.1580592393875122, |
|
"eval_runtime": 10.0265, |
|
"eval_samples_per_second": 233.182, |
|
"eval_steps_per_second": 3.69, |
|
"step": 416 |
|
}, |
|
{ |
|
"epoch": 64.61538461538461, |
|
"grad_norm": 4.720950603485107, |
|
"learning_rate": 1.6666666666666667e-05, |
|
"loss": 0.2307, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 64.92307692307692, |
|
"eval_accuracy": 0.5337895637296834, |
|
"eval_loss": 0.9987441897392273, |
|
"eval_runtime": 10.0605, |
|
"eval_samples_per_second": 232.395, |
|
"eval_steps_per_second": 3.678, |
|
"step": 422 |
|
}, |
|
{ |
|
"epoch": 66.0, |
|
"eval_accuracy": 0.5414884516680923, |
|
"eval_loss": 1.084990382194519, |
|
"eval_runtime": 9.9921, |
|
"eval_samples_per_second": 233.986, |
|
"eval_steps_per_second": 3.703, |
|
"step": 429 |
|
}, |
|
{ |
|
"epoch": 66.15384615384616, |
|
"grad_norm": 5.033910274505615, |
|
"learning_rate": 1.574074074074074e-05, |
|
"loss": 0.2068, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 66.92307692307692, |
|
"eval_accuracy": 0.6013686911890505, |
|
"eval_loss": 0.942755401134491, |
|
"eval_runtime": 9.7471, |
|
"eval_samples_per_second": 239.866, |
|
"eval_steps_per_second": 3.796, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 67.6923076923077, |
|
"grad_norm": 5.34838342666626, |
|
"learning_rate": 1.4814814814814815e-05, |
|
"loss": 0.2126, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 68.0, |
|
"eval_accuracy": 0.5115483319076134, |
|
"eval_loss": 1.237959861755371, |
|
"eval_runtime": 9.9382, |
|
"eval_samples_per_second": 235.253, |
|
"eval_steps_per_second": 3.723, |
|
"step": 442 |
|
}, |
|
{ |
|
"epoch": 68.92307692307692, |
|
"eval_accuracy": 0.5859709153122327, |
|
"eval_loss": 0.9992711544036865, |
|
"eval_runtime": 10.002, |
|
"eval_samples_per_second": 233.754, |
|
"eval_steps_per_second": 3.699, |
|
"step": 448 |
|
}, |
|
{ |
|
"epoch": 69.23076923076923, |
|
"grad_norm": 3.1523709297180176, |
|
"learning_rate": 1.388888888888889e-05, |
|
"loss": 0.2176, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 70.0, |
|
"eval_accuracy": 0.5021385799828914, |
|
"eval_loss": 1.190958023071289, |
|
"eval_runtime": 9.7096, |
|
"eval_samples_per_second": 240.793, |
|
"eval_steps_per_second": 3.811, |
|
"step": 455 |
|
}, |
|
{ |
|
"epoch": 70.76923076923077, |
|
"grad_norm": 4.65359354019165, |
|
"learning_rate": 1.2962962962962962e-05, |
|
"loss": 0.2096, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 70.92307692307692, |
|
"eval_accuracy": 0.5119760479041916, |
|
"eval_loss": 1.246795415878296, |
|
"eval_runtime": 9.658, |
|
"eval_samples_per_second": 242.079, |
|
"eval_steps_per_second": 3.831, |
|
"step": 461 |
|
}, |
|
{ |
|
"epoch": 72.0, |
|
"eval_accuracy": 0.6920444824636441, |
|
"eval_loss": 0.7588455677032471, |
|
"eval_runtime": 10.2262, |
|
"eval_samples_per_second": 228.628, |
|
"eval_steps_per_second": 3.618, |
|
"step": 468 |
|
}, |
|
{ |
|
"epoch": 72.3076923076923, |
|
"grad_norm": 4.450184345245361, |
|
"learning_rate": 1.2037037037037037e-05, |
|
"loss": 0.2092, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 72.92307692307692, |
|
"eval_accuracy": 0.6308810949529512, |
|
"eval_loss": 0.900288999080658, |
|
"eval_runtime": 9.6458, |
|
"eval_samples_per_second": 242.386, |
|
"eval_steps_per_second": 3.836, |
|
"step": 474 |
|
}, |
|
{ |
|
"epoch": 73.84615384615384, |
|
"grad_norm": 4.230223178863525, |
|
"learning_rate": 1.1111111111111112e-05, |
|
"loss": 0.1968, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 74.0, |
|
"eval_accuracy": 0.564585115483319, |
|
"eval_loss": 1.1697088479995728, |
|
"eval_runtime": 9.7325, |
|
"eval_samples_per_second": 240.225, |
|
"eval_steps_per_second": 3.802, |
|
"step": 481 |
|
}, |
|
{ |
|
"epoch": 74.92307692307692, |
|
"eval_accuracy": 0.6445680068434559, |
|
"eval_loss": 0.8789314031600952, |
|
"eval_runtime": 9.8672, |
|
"eval_samples_per_second": 236.946, |
|
"eval_steps_per_second": 3.75, |
|
"step": 487 |
|
}, |
|
{ |
|
"epoch": 75.38461538461539, |
|
"grad_norm": 5.635646343231201, |
|
"learning_rate": 1.0185185185185185e-05, |
|
"loss": 0.2027, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 76.0, |
|
"eval_accuracy": 0.5598802395209581, |
|
"eval_loss": 1.1352075338363647, |
|
"eval_runtime": 10.2152, |
|
"eval_samples_per_second": 228.875, |
|
"eval_steps_per_second": 3.622, |
|
"step": 494 |
|
}, |
|
{ |
|
"epoch": 76.92307692307692, |
|
"grad_norm": 4.811977386474609, |
|
"learning_rate": 9.259259259259259e-06, |
|
"loss": 0.1965, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 76.92307692307692, |
|
"eval_accuracy": 0.5598802395209581, |
|
"eval_loss": 1.083630919456482, |
|
"eval_runtime": 9.8228, |
|
"eval_samples_per_second": 238.017, |
|
"eval_steps_per_second": 3.767, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 78.0, |
|
"eval_accuracy": 0.5902480752780154, |
|
"eval_loss": 1.018804669380188, |
|
"eval_runtime": 10.2662, |
|
"eval_samples_per_second": 227.739, |
|
"eval_steps_per_second": 3.604, |
|
"step": 507 |
|
}, |
|
{ |
|
"epoch": 78.46153846153847, |
|
"grad_norm": 4.3275227546691895, |
|
"learning_rate": 8.333333333333334e-06, |
|
"loss": 0.2267, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 78.92307692307692, |
|
"eval_accuracy": 0.5975192472198461, |
|
"eval_loss": 1.0287189483642578, |
|
"eval_runtime": 10.2326, |
|
"eval_samples_per_second": 228.486, |
|
"eval_steps_per_second": 3.616, |
|
"step": 513 |
|
}, |
|
{ |
|
"epoch": 80.0, |
|
"grad_norm": 5.816257476806641, |
|
"learning_rate": 7.4074074074074075e-06, |
|
"loss": 0.1967, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 80.0, |
|
"eval_accuracy": 0.6544054747647562, |
|
"eval_loss": 0.8465330004692078, |
|
"eval_runtime": 9.6333, |
|
"eval_samples_per_second": 242.7, |
|
"eval_steps_per_second": 3.841, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 80.92307692307692, |
|
"eval_accuracy": 0.5470487596236099, |
|
"eval_loss": 1.188087821006775, |
|
"eval_runtime": 9.8344, |
|
"eval_samples_per_second": 237.736, |
|
"eval_steps_per_second": 3.762, |
|
"step": 526 |
|
}, |
|
{ |
|
"epoch": 81.53846153846153, |
|
"grad_norm": 4.023173809051514, |
|
"learning_rate": 6.481481481481481e-06, |
|
"loss": 0.1842, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 82.0, |
|
"eval_accuracy": 0.5367835757057314, |
|
"eval_loss": 1.235166072845459, |
|
"eval_runtime": 9.8519, |
|
"eval_samples_per_second": 237.315, |
|
"eval_steps_per_second": 3.756, |
|
"step": 533 |
|
}, |
|
{ |
|
"epoch": 82.92307692307692, |
|
"eval_accuracy": 0.5701454234388366, |
|
"eval_loss": 1.106431007385254, |
|
"eval_runtime": 10.0863, |
|
"eval_samples_per_second": 231.799, |
|
"eval_steps_per_second": 3.668, |
|
"step": 539 |
|
}, |
|
{ |
|
"epoch": 83.07692307692308, |
|
"grad_norm": 5.945059299468994, |
|
"learning_rate": 5.555555555555556e-06, |
|
"loss": 0.1952, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 84.0, |
|
"eval_accuracy": 0.6608212147134302, |
|
"eval_loss": 0.8087576031684875, |
|
"eval_runtime": 9.3282, |
|
"eval_samples_per_second": 250.637, |
|
"eval_steps_per_second": 3.966, |
|
"step": 546 |
|
}, |
|
{ |
|
"epoch": 84.61538461538461, |
|
"grad_norm": 4.164029121398926, |
|
"learning_rate": 4.6296296296296296e-06, |
|
"loss": 0.1873, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 84.92307692307692, |
|
"eval_accuracy": 0.6086398631308811, |
|
"eval_loss": 0.9341749548912048, |
|
"eval_runtime": 9.6842, |
|
"eval_samples_per_second": 241.425, |
|
"eval_steps_per_second": 3.821, |
|
"step": 552 |
|
}, |
|
{ |
|
"epoch": 86.0, |
|
"eval_accuracy": 0.6056458511548332, |
|
"eval_loss": 0.9807350039482117, |
|
"eval_runtime": 9.6406, |
|
"eval_samples_per_second": 242.516, |
|
"eval_steps_per_second": 3.838, |
|
"step": 559 |
|
}, |
|
{ |
|
"epoch": 86.15384615384616, |
|
"grad_norm": 5.312713146209717, |
|
"learning_rate": 3.7037037037037037e-06, |
|
"loss": 0.185, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 86.92307692307692, |
|
"eval_accuracy": 0.5898203592814372, |
|
"eval_loss": 1.0164724588394165, |
|
"eval_runtime": 9.7581, |
|
"eval_samples_per_second": 239.596, |
|
"eval_steps_per_second": 3.792, |
|
"step": 565 |
|
}, |
|
{ |
|
"epoch": 87.6923076923077, |
|
"grad_norm": 5.478633403778076, |
|
"learning_rate": 2.777777777777778e-06, |
|
"loss": 0.1993, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 88.0, |
|
"eval_accuracy": 0.5474764756201882, |
|
"eval_loss": 1.1511483192443848, |
|
"eval_runtime": 9.905, |
|
"eval_samples_per_second": 236.042, |
|
"eval_steps_per_second": 3.735, |
|
"step": 572 |
|
}, |
|
{ |
|
"epoch": 88.92307692307692, |
|
"eval_accuracy": 0.5406330196749358, |
|
"eval_loss": 1.176562786102295, |
|
"eval_runtime": 10.1535, |
|
"eval_samples_per_second": 230.266, |
|
"eval_steps_per_second": 3.644, |
|
"step": 578 |
|
}, |
|
{ |
|
"epoch": 89.23076923076923, |
|
"grad_norm": 3.2113797664642334, |
|
"learning_rate": 1.8518518518518519e-06, |
|
"loss": 0.1707, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 90.0, |
|
"eval_accuracy": 0.5662959794696322, |
|
"eval_loss": 1.120088815689087, |
|
"eval_runtime": 9.7143, |
|
"eval_samples_per_second": 240.676, |
|
"eval_steps_per_second": 3.809, |
|
"step": 585 |
|
}, |
|
{ |
|
"epoch": 90.76923076923077, |
|
"grad_norm": 5.403631210327148, |
|
"learning_rate": 9.259259259259259e-07, |
|
"loss": 0.1852, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 90.92307692307692, |
|
"eval_accuracy": 0.5701454234388366, |
|
"eval_loss": 1.116164207458496, |
|
"eval_runtime": 10.0497, |
|
"eval_samples_per_second": 232.643, |
|
"eval_steps_per_second": 3.682, |
|
"step": 591 |
|
}, |
|
{ |
|
"epoch": 92.0, |
|
"eval_accuracy": 0.5680068434559452, |
|
"eval_loss": 1.1272914409637451, |
|
"eval_runtime": 9.9727, |
|
"eval_samples_per_second": 234.44, |
|
"eval_steps_per_second": 3.71, |
|
"step": 598 |
|
}, |
|
{ |
|
"epoch": 92.3076923076923, |
|
"grad_norm": 4.397765159606934, |
|
"learning_rate": 0.0, |
|
"loss": 0.1904, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 92.3076923076923, |
|
"eval_accuracy": 0.5680068434559452, |
|
"eval_loss": 1.1280397176742554, |
|
"eval_runtime": 10.1432, |
|
"eval_samples_per_second": 230.5, |
|
"eval_steps_per_second": 3.648, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 92.3076923076923, |
|
"step": 600, |
|
"total_flos": 2.9138957540265e+18, |
|
"train_loss": 0.273075803120931, |
|
"train_runtime": 2186.3924, |
|
"train_samples_per_second": 73.774, |
|
"train_steps_per_second": 0.274 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 600, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 100, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 2.9138957540265e+18, |
|
"train_batch_size": 64, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|