|
{ |
|
"best_metric": 0.8450704225352113, |
|
"best_model_checkpoint": "deit-base-distilled-patch16-224-65-fold4/checkpoint-182", |
|
"epoch": 92.3076923076923, |
|
"eval_steps": 500, |
|
"global_step": 300, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.9230769230769231, |
|
"eval_accuracy": 0.5211267605633803, |
|
"eval_loss": 0.7144731879234314, |
|
"eval_runtime": 1.0188, |
|
"eval_samples_per_second": 69.692, |
|
"eval_steps_per_second": 2.945, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 1.8461538461538463, |
|
"eval_accuracy": 0.5070422535211268, |
|
"eval_loss": 0.7082186341285706, |
|
"eval_runtime": 0.9339, |
|
"eval_samples_per_second": 76.023, |
|
"eval_steps_per_second": 3.212, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 2.769230769230769, |
|
"eval_accuracy": 0.6056338028169014, |
|
"eval_loss": 0.6889063715934753, |
|
"eval_runtime": 0.9191, |
|
"eval_samples_per_second": 77.245, |
|
"eval_steps_per_second": 3.264, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 3.076923076923077, |
|
"grad_norm": 3.284947633743286, |
|
"learning_rate": 1.6666666666666667e-05, |
|
"loss": 0.6878, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.6619718309859155, |
|
"eval_loss": 0.6703165173530579, |
|
"eval_runtime": 0.9363, |
|
"eval_samples_per_second": 75.83, |
|
"eval_steps_per_second": 3.204, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 4.923076923076923, |
|
"eval_accuracy": 0.676056338028169, |
|
"eval_loss": 0.6556025743484497, |
|
"eval_runtime": 0.9344, |
|
"eval_samples_per_second": 75.984, |
|
"eval_steps_per_second": 3.211, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 5.846153846153846, |
|
"eval_accuracy": 0.6619718309859155, |
|
"eval_loss": 0.642956554889679, |
|
"eval_runtime": 0.9474, |
|
"eval_samples_per_second": 74.941, |
|
"eval_steps_per_second": 3.167, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 6.153846153846154, |
|
"grad_norm": 5.985997676849365, |
|
"learning_rate": 3.3333333333333335e-05, |
|
"loss": 0.6203, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 6.769230769230769, |
|
"eval_accuracy": 0.676056338028169, |
|
"eval_loss": 0.6249927282333374, |
|
"eval_runtime": 0.9539, |
|
"eval_samples_per_second": 74.435, |
|
"eval_steps_per_second": 3.145, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.6197183098591549, |
|
"eval_loss": 0.7463871240615845, |
|
"eval_runtime": 0.9471, |
|
"eval_samples_per_second": 74.963, |
|
"eval_steps_per_second": 3.167, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 8.923076923076923, |
|
"eval_accuracy": 0.6056338028169014, |
|
"eval_loss": 0.6646994352340698, |
|
"eval_runtime": 0.9245, |
|
"eval_samples_per_second": 76.797, |
|
"eval_steps_per_second": 3.245, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 9.23076923076923, |
|
"grad_norm": 7.882161617279053, |
|
"learning_rate": 5e-05, |
|
"loss": 0.5703, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 9.846153846153847, |
|
"eval_accuracy": 0.704225352112676, |
|
"eval_loss": 0.6096706390380859, |
|
"eval_runtime": 0.9619, |
|
"eval_samples_per_second": 73.809, |
|
"eval_steps_per_second": 3.119, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 10.76923076923077, |
|
"eval_accuracy": 0.6619718309859155, |
|
"eval_loss": 0.6261224746704102, |
|
"eval_runtime": 0.9396, |
|
"eval_samples_per_second": 75.56, |
|
"eval_steps_per_second": 3.193, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.704225352112676, |
|
"eval_loss": 0.5925809741020203, |
|
"eval_runtime": 0.9277, |
|
"eval_samples_per_second": 76.537, |
|
"eval_steps_per_second": 3.234, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 12.307692307692308, |
|
"grad_norm": 5.672287940979004, |
|
"learning_rate": 4.814814814814815e-05, |
|
"loss": 0.5281, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 12.923076923076923, |
|
"eval_accuracy": 0.7464788732394366, |
|
"eval_loss": 0.5370251536369324, |
|
"eval_runtime": 0.9419, |
|
"eval_samples_per_second": 75.378, |
|
"eval_steps_per_second": 3.185, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 13.846153846153847, |
|
"eval_accuracy": 0.7464788732394366, |
|
"eval_loss": 0.5637533664703369, |
|
"eval_runtime": 0.9264, |
|
"eval_samples_per_second": 76.638, |
|
"eval_steps_per_second": 3.238, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 14.76923076923077, |
|
"eval_accuracy": 0.6056338028169014, |
|
"eval_loss": 0.7175151109695435, |
|
"eval_runtime": 0.9684, |
|
"eval_samples_per_second": 73.32, |
|
"eval_steps_per_second": 3.098, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 15.384615384615385, |
|
"grad_norm": 7.855106830596924, |
|
"learning_rate": 4.62962962962963e-05, |
|
"loss": 0.4616, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_accuracy": 0.5774647887323944, |
|
"eval_loss": 0.8917320966720581, |
|
"eval_runtime": 0.9446, |
|
"eval_samples_per_second": 75.161, |
|
"eval_steps_per_second": 3.176, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 16.923076923076923, |
|
"eval_accuracy": 0.676056338028169, |
|
"eval_loss": 0.6760779023170471, |
|
"eval_runtime": 0.9228, |
|
"eval_samples_per_second": 76.943, |
|
"eval_steps_per_second": 3.251, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 17.846153846153847, |
|
"eval_accuracy": 0.7323943661971831, |
|
"eval_loss": 0.5606212019920349, |
|
"eval_runtime": 0.9447, |
|
"eval_samples_per_second": 75.157, |
|
"eval_steps_per_second": 3.176, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 18.46153846153846, |
|
"grad_norm": 5.475687503814697, |
|
"learning_rate": 4.4444444444444447e-05, |
|
"loss": 0.4943, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 18.76923076923077, |
|
"eval_accuracy": 0.6338028169014085, |
|
"eval_loss": 0.696312427520752, |
|
"eval_runtime": 0.9291, |
|
"eval_samples_per_second": 76.418, |
|
"eval_steps_per_second": 3.229, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_accuracy": 0.6619718309859155, |
|
"eval_loss": 0.6461552977561951, |
|
"eval_runtime": 0.9441, |
|
"eval_samples_per_second": 75.207, |
|
"eval_steps_per_second": 3.178, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 20.923076923076923, |
|
"eval_accuracy": 0.7183098591549296, |
|
"eval_loss": 0.6246035695075989, |
|
"eval_runtime": 0.9435, |
|
"eval_samples_per_second": 75.251, |
|
"eval_steps_per_second": 3.18, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 21.53846153846154, |
|
"grad_norm": 4.991897106170654, |
|
"learning_rate": 4.259259259259259e-05, |
|
"loss": 0.4058, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 21.846153846153847, |
|
"eval_accuracy": 0.6619718309859155, |
|
"eval_loss": 0.7336124181747437, |
|
"eval_runtime": 0.9352, |
|
"eval_samples_per_second": 75.921, |
|
"eval_steps_per_second": 3.208, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 22.76923076923077, |
|
"eval_accuracy": 0.7323943661971831, |
|
"eval_loss": 0.6270064115524292, |
|
"eval_runtime": 0.9384, |
|
"eval_samples_per_second": 75.658, |
|
"eval_steps_per_second": 3.197, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"eval_accuracy": 0.7183098591549296, |
|
"eval_loss": 0.6096646189689636, |
|
"eval_runtime": 0.941, |
|
"eval_samples_per_second": 75.453, |
|
"eval_steps_per_second": 3.188, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 24.615384615384617, |
|
"grad_norm": 4.380879878997803, |
|
"learning_rate": 4.074074074074074e-05, |
|
"loss": 0.3577, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 24.923076923076923, |
|
"eval_accuracy": 0.7605633802816901, |
|
"eval_loss": 0.6699813008308411, |
|
"eval_runtime": 0.9378, |
|
"eval_samples_per_second": 75.708, |
|
"eval_steps_per_second": 3.199, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 25.846153846153847, |
|
"eval_accuracy": 0.7183098591549296, |
|
"eval_loss": 0.6676053404808044, |
|
"eval_runtime": 0.9357, |
|
"eval_samples_per_second": 75.876, |
|
"eval_steps_per_second": 3.206, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 26.76923076923077, |
|
"eval_accuracy": 0.7887323943661971, |
|
"eval_loss": 0.5475191473960876, |
|
"eval_runtime": 0.9411, |
|
"eval_samples_per_second": 75.444, |
|
"eval_steps_per_second": 3.188, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 27.692307692307693, |
|
"grad_norm": 2.8975579738616943, |
|
"learning_rate": 3.888888888888889e-05, |
|
"loss": 0.2988, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"eval_accuracy": 0.8028169014084507, |
|
"eval_loss": 0.5382919311523438, |
|
"eval_runtime": 0.9412, |
|
"eval_samples_per_second": 75.435, |
|
"eval_steps_per_second": 3.187, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 28.923076923076923, |
|
"eval_accuracy": 0.7183098591549296, |
|
"eval_loss": 0.5533848404884338, |
|
"eval_runtime": 0.9288, |
|
"eval_samples_per_second": 76.444, |
|
"eval_steps_per_second": 3.23, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 29.846153846153847, |
|
"eval_accuracy": 0.8028169014084507, |
|
"eval_loss": 0.5842180252075195, |
|
"eval_runtime": 0.9277, |
|
"eval_samples_per_second": 76.533, |
|
"eval_steps_per_second": 3.234, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 30.76923076923077, |
|
"grad_norm": 3.0538382530212402, |
|
"learning_rate": 3.7037037037037037e-05, |
|
"loss": 0.2595, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 30.76923076923077, |
|
"eval_accuracy": 0.7887323943661971, |
|
"eval_loss": 0.5964645743370056, |
|
"eval_runtime": 0.9103, |
|
"eval_samples_per_second": 77.994, |
|
"eval_steps_per_second": 3.296, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 32.0, |
|
"eval_accuracy": 0.7605633802816901, |
|
"eval_loss": 0.6219843626022339, |
|
"eval_runtime": 0.9431, |
|
"eval_samples_per_second": 75.281, |
|
"eval_steps_per_second": 3.181, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 32.92307692307692, |
|
"eval_accuracy": 0.7605633802816901, |
|
"eval_loss": 0.6026638150215149, |
|
"eval_runtime": 0.9477, |
|
"eval_samples_per_second": 74.916, |
|
"eval_steps_per_second": 3.165, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 33.84615384615385, |
|
"grad_norm": 3.94685959815979, |
|
"learning_rate": 3.518518518518519e-05, |
|
"loss": 0.2422, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 33.84615384615385, |
|
"eval_accuracy": 0.7183098591549296, |
|
"eval_loss": 0.6368895769119263, |
|
"eval_runtime": 0.9216, |
|
"eval_samples_per_second": 77.044, |
|
"eval_steps_per_second": 3.255, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 34.76923076923077, |
|
"eval_accuracy": 0.7746478873239436, |
|
"eval_loss": 0.6033357381820679, |
|
"eval_runtime": 0.9401, |
|
"eval_samples_per_second": 75.526, |
|
"eval_steps_per_second": 3.191, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 36.0, |
|
"eval_accuracy": 0.7323943661971831, |
|
"eval_loss": 0.6912439465522766, |
|
"eval_runtime": 0.9337, |
|
"eval_samples_per_second": 76.043, |
|
"eval_steps_per_second": 3.213, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 36.92307692307692, |
|
"grad_norm": 1.8390007019042969, |
|
"learning_rate": 3.3333333333333335e-05, |
|
"loss": 0.1927, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 36.92307692307692, |
|
"eval_accuracy": 0.7887323943661971, |
|
"eval_loss": 0.6582470536231995, |
|
"eval_runtime": 0.9394, |
|
"eval_samples_per_second": 75.581, |
|
"eval_steps_per_second": 3.194, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 37.84615384615385, |
|
"eval_accuracy": 0.7746478873239436, |
|
"eval_loss": 0.6320064067840576, |
|
"eval_runtime": 0.9433, |
|
"eval_samples_per_second": 75.271, |
|
"eval_steps_per_second": 3.18, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 38.76923076923077, |
|
"eval_accuracy": 0.7605633802816901, |
|
"eval_loss": 0.753158450126648, |
|
"eval_runtime": 0.9223, |
|
"eval_samples_per_second": 76.983, |
|
"eval_steps_per_second": 3.253, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 40.0, |
|
"grad_norm": 8.522945404052734, |
|
"learning_rate": 3.148148148148148e-05, |
|
"loss": 0.2399, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 40.0, |
|
"eval_accuracy": 0.7605633802816901, |
|
"eval_loss": 0.7909286618232727, |
|
"eval_runtime": 0.9385, |
|
"eval_samples_per_second": 75.651, |
|
"eval_steps_per_second": 3.197, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 40.92307692307692, |
|
"eval_accuracy": 0.7464788732394366, |
|
"eval_loss": 0.6808232665061951, |
|
"eval_runtime": 0.929, |
|
"eval_samples_per_second": 76.424, |
|
"eval_steps_per_second": 3.229, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 41.84615384615385, |
|
"eval_accuracy": 0.7887323943661971, |
|
"eval_loss": 0.5815951824188232, |
|
"eval_runtime": 0.9333, |
|
"eval_samples_per_second": 76.072, |
|
"eval_steps_per_second": 3.214, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 42.76923076923077, |
|
"eval_accuracy": 0.7887323943661971, |
|
"eval_loss": 0.5473891496658325, |
|
"eval_runtime": 0.9353, |
|
"eval_samples_per_second": 75.912, |
|
"eval_steps_per_second": 3.208, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 43.07692307692308, |
|
"grad_norm": 3.9758527278900146, |
|
"learning_rate": 2.962962962962963e-05, |
|
"loss": 0.2218, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 44.0, |
|
"eval_accuracy": 0.704225352112676, |
|
"eval_loss": 0.6309952139854431, |
|
"eval_runtime": 0.9488, |
|
"eval_samples_per_second": 74.835, |
|
"eval_steps_per_second": 3.162, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 44.92307692307692, |
|
"eval_accuracy": 0.8028169014084507, |
|
"eval_loss": 0.6452985405921936, |
|
"eval_runtime": 0.9466, |
|
"eval_samples_per_second": 75.001, |
|
"eval_steps_per_second": 3.169, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 45.84615384615385, |
|
"eval_accuracy": 0.7887323943661971, |
|
"eval_loss": 0.616996169090271, |
|
"eval_runtime": 0.934, |
|
"eval_samples_per_second": 76.018, |
|
"eval_steps_per_second": 3.212, |
|
"step": 149 |
|
}, |
|
{ |
|
"epoch": 46.15384615384615, |
|
"grad_norm": 3.51767635345459, |
|
"learning_rate": 2.777777777777778e-05, |
|
"loss": 0.1817, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 46.76923076923077, |
|
"eval_accuracy": 0.7887323943661971, |
|
"eval_loss": 0.603383481502533, |
|
"eval_runtime": 0.9385, |
|
"eval_samples_per_second": 75.653, |
|
"eval_steps_per_second": 3.197, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 48.0, |
|
"eval_accuracy": 0.8309859154929577, |
|
"eval_loss": 0.6349979043006897, |
|
"eval_runtime": 0.9472, |
|
"eval_samples_per_second": 74.957, |
|
"eval_steps_per_second": 3.167, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 48.92307692307692, |
|
"eval_accuracy": 0.7887323943661971, |
|
"eval_loss": 0.6026537418365479, |
|
"eval_runtime": 0.9329, |
|
"eval_samples_per_second": 76.103, |
|
"eval_steps_per_second": 3.216, |
|
"step": 159 |
|
}, |
|
{ |
|
"epoch": 49.23076923076923, |
|
"grad_norm": 2.8314857482910156, |
|
"learning_rate": 2.5925925925925925e-05, |
|
"loss": 0.1483, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 49.84615384615385, |
|
"eval_accuracy": 0.8028169014084507, |
|
"eval_loss": 0.559914767742157, |
|
"eval_runtime": 0.9519, |
|
"eval_samples_per_second": 74.59, |
|
"eval_steps_per_second": 3.152, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 50.76923076923077, |
|
"eval_accuracy": 0.8309859154929577, |
|
"eval_loss": 0.5817492604255676, |
|
"eval_runtime": 0.9277, |
|
"eval_samples_per_second": 76.537, |
|
"eval_steps_per_second": 3.234, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 52.0, |
|
"eval_accuracy": 0.7746478873239436, |
|
"eval_loss": 0.6085928082466125, |
|
"eval_runtime": 0.9365, |
|
"eval_samples_per_second": 75.816, |
|
"eval_steps_per_second": 3.203, |
|
"step": 169 |
|
}, |
|
{ |
|
"epoch": 52.30769230769231, |
|
"grad_norm": 4.343232154846191, |
|
"learning_rate": 2.4074074074074074e-05, |
|
"loss": 0.1668, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 52.92307692307692, |
|
"eval_accuracy": 0.8169014084507042, |
|
"eval_loss": 0.5743552446365356, |
|
"eval_runtime": 0.9412, |
|
"eval_samples_per_second": 75.439, |
|
"eval_steps_per_second": 3.188, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 53.84615384615385, |
|
"eval_accuracy": 0.7887323943661971, |
|
"eval_loss": 0.605885922908783, |
|
"eval_runtime": 0.9418, |
|
"eval_samples_per_second": 75.387, |
|
"eval_steps_per_second": 3.185, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 54.76923076923077, |
|
"eval_accuracy": 0.7887323943661971, |
|
"eval_loss": 0.6454694867134094, |
|
"eval_runtime": 0.9437, |
|
"eval_samples_per_second": 75.239, |
|
"eval_steps_per_second": 3.179, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 55.38461538461539, |
|
"grad_norm": 4.7387166023254395, |
|
"learning_rate": 2.2222222222222223e-05, |
|
"loss": 0.1372, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 56.0, |
|
"eval_accuracy": 0.8450704225352113, |
|
"eval_loss": 0.5367295145988464, |
|
"eval_runtime": 0.9301, |
|
"eval_samples_per_second": 76.34, |
|
"eval_steps_per_second": 3.226, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 56.92307692307692, |
|
"eval_accuracy": 0.8169014084507042, |
|
"eval_loss": 0.5615168213844299, |
|
"eval_runtime": 0.9333, |
|
"eval_samples_per_second": 76.074, |
|
"eval_steps_per_second": 3.214, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 57.84615384615385, |
|
"eval_accuracy": 0.8028169014084507, |
|
"eval_loss": 0.6377549767494202, |
|
"eval_runtime": 0.9271, |
|
"eval_samples_per_second": 76.581, |
|
"eval_steps_per_second": 3.236, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 58.46153846153846, |
|
"grad_norm": 3.1127660274505615, |
|
"learning_rate": 2.037037037037037e-05, |
|
"loss": 0.1485, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 58.76923076923077, |
|
"eval_accuracy": 0.8169014084507042, |
|
"eval_loss": 0.5687428116798401, |
|
"eval_runtime": 0.9342, |
|
"eval_samples_per_second": 75.999, |
|
"eval_steps_per_second": 3.211, |
|
"step": 191 |
|
}, |
|
{ |
|
"epoch": 60.0, |
|
"eval_accuracy": 0.8169014084507042, |
|
"eval_loss": 0.48974475264549255, |
|
"eval_runtime": 0.9499, |
|
"eval_samples_per_second": 74.743, |
|
"eval_steps_per_second": 3.158, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 60.92307692307692, |
|
"eval_accuracy": 0.8450704225352113, |
|
"eval_loss": 0.43841129541397095, |
|
"eval_runtime": 0.9329, |
|
"eval_samples_per_second": 76.104, |
|
"eval_steps_per_second": 3.216, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 61.53846153846154, |
|
"grad_norm": 2.5722365379333496, |
|
"learning_rate": 1.8518518518518518e-05, |
|
"loss": 0.1426, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 61.84615384615385, |
|
"eval_accuracy": 0.7887323943661971, |
|
"eval_loss": 0.5086758732795715, |
|
"eval_runtime": 0.9371, |
|
"eval_samples_per_second": 75.766, |
|
"eval_steps_per_second": 3.201, |
|
"step": 201 |
|
}, |
|
{ |
|
"epoch": 62.76923076923077, |
|
"eval_accuracy": 0.8169014084507042, |
|
"eval_loss": 0.47570785880088806, |
|
"eval_runtime": 0.935, |
|
"eval_samples_per_second": 75.939, |
|
"eval_steps_per_second": 3.209, |
|
"step": 204 |
|
}, |
|
{ |
|
"epoch": 64.0, |
|
"eval_accuracy": 0.8169014084507042, |
|
"eval_loss": 0.4372512102127075, |
|
"eval_runtime": 0.9455, |
|
"eval_samples_per_second": 75.095, |
|
"eval_steps_per_second": 3.173, |
|
"step": 208 |
|
}, |
|
{ |
|
"epoch": 64.61538461538461, |
|
"grad_norm": 3.0248568058013916, |
|
"learning_rate": 1.6666666666666667e-05, |
|
"loss": 0.1333, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 64.92307692307692, |
|
"eval_accuracy": 0.8169014084507042, |
|
"eval_loss": 0.4512348473072052, |
|
"eval_runtime": 0.9662, |
|
"eval_samples_per_second": 73.486, |
|
"eval_steps_per_second": 3.105, |
|
"step": 211 |
|
}, |
|
{ |
|
"epoch": 65.84615384615384, |
|
"eval_accuracy": 0.7887323943661971, |
|
"eval_loss": 0.4619483947753906, |
|
"eval_runtime": 0.9227, |
|
"eval_samples_per_second": 76.949, |
|
"eval_steps_per_second": 3.251, |
|
"step": 214 |
|
}, |
|
{ |
|
"epoch": 66.76923076923077, |
|
"eval_accuracy": 0.8028169014084507, |
|
"eval_loss": 0.5519948601722717, |
|
"eval_runtime": 0.9387, |
|
"eval_samples_per_second": 75.634, |
|
"eval_steps_per_second": 3.196, |
|
"step": 217 |
|
}, |
|
{ |
|
"epoch": 67.6923076923077, |
|
"grad_norm": 3.7715258598327637, |
|
"learning_rate": 1.4814814814814815e-05, |
|
"loss": 0.1306, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 68.0, |
|
"eval_accuracy": 0.7887323943661971, |
|
"eval_loss": 0.5160856246948242, |
|
"eval_runtime": 0.9414, |
|
"eval_samples_per_second": 75.417, |
|
"eval_steps_per_second": 3.187, |
|
"step": 221 |
|
}, |
|
{ |
|
"epoch": 68.92307692307692, |
|
"eval_accuracy": 0.7605633802816901, |
|
"eval_loss": 0.5180492401123047, |
|
"eval_runtime": 0.9311, |
|
"eval_samples_per_second": 76.252, |
|
"eval_steps_per_second": 3.222, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 69.84615384615384, |
|
"eval_accuracy": 0.8028169014084507, |
|
"eval_loss": 0.5777754783630371, |
|
"eval_runtime": 0.9362, |
|
"eval_samples_per_second": 75.843, |
|
"eval_steps_per_second": 3.205, |
|
"step": 227 |
|
}, |
|
{ |
|
"epoch": 70.76923076923077, |
|
"grad_norm": 2.9611454010009766, |
|
"learning_rate": 1.2962962962962962e-05, |
|
"loss": 0.1327, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 70.76923076923077, |
|
"eval_accuracy": 0.8028169014084507, |
|
"eval_loss": 0.593269407749176, |
|
"eval_runtime": 0.9395, |
|
"eval_samples_per_second": 75.57, |
|
"eval_steps_per_second": 3.193, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 72.0, |
|
"eval_accuracy": 0.7887323943661971, |
|
"eval_loss": 0.5222498178482056, |
|
"eval_runtime": 0.9354, |
|
"eval_samples_per_second": 75.903, |
|
"eval_steps_per_second": 3.207, |
|
"step": 234 |
|
}, |
|
{ |
|
"epoch": 72.92307692307692, |
|
"eval_accuracy": 0.8169014084507042, |
|
"eval_loss": 0.5103858709335327, |
|
"eval_runtime": 0.941, |
|
"eval_samples_per_second": 75.455, |
|
"eval_steps_per_second": 3.188, |
|
"step": 237 |
|
}, |
|
{ |
|
"epoch": 73.84615384615384, |
|
"grad_norm": 2.8741390705108643, |
|
"learning_rate": 1.1111111111111112e-05, |
|
"loss": 0.1171, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 73.84615384615384, |
|
"eval_accuracy": 0.8169014084507042, |
|
"eval_loss": 0.5024046301841736, |
|
"eval_runtime": 0.943, |
|
"eval_samples_per_second": 75.292, |
|
"eval_steps_per_second": 3.181, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 74.76923076923077, |
|
"eval_accuracy": 0.8028169014084507, |
|
"eval_loss": 0.50595623254776, |
|
"eval_runtime": 0.944, |
|
"eval_samples_per_second": 75.213, |
|
"eval_steps_per_second": 3.178, |
|
"step": 243 |
|
}, |
|
{ |
|
"epoch": 76.0, |
|
"eval_accuracy": 0.7746478873239436, |
|
"eval_loss": 0.5267102718353271, |
|
"eval_runtime": 0.937, |
|
"eval_samples_per_second": 75.772, |
|
"eval_steps_per_second": 3.202, |
|
"step": 247 |
|
}, |
|
{ |
|
"epoch": 76.92307692307692, |
|
"grad_norm": 3.9980766773223877, |
|
"learning_rate": 9.259259259259259e-06, |
|
"loss": 0.1227, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 76.92307692307692, |
|
"eval_accuracy": 0.8169014084507042, |
|
"eval_loss": 0.4774617552757263, |
|
"eval_runtime": 0.9456, |
|
"eval_samples_per_second": 75.082, |
|
"eval_steps_per_second": 3.172, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 77.84615384615384, |
|
"eval_accuracy": 0.8169014084507042, |
|
"eval_loss": 0.5019908547401428, |
|
"eval_runtime": 0.9328, |
|
"eval_samples_per_second": 76.116, |
|
"eval_steps_per_second": 3.216, |
|
"step": 253 |
|
}, |
|
{ |
|
"epoch": 78.76923076923077, |
|
"eval_accuracy": 0.7605633802816901, |
|
"eval_loss": 0.5243141651153564, |
|
"eval_runtime": 0.9369, |
|
"eval_samples_per_second": 75.782, |
|
"eval_steps_per_second": 3.202, |
|
"step": 256 |
|
}, |
|
{ |
|
"epoch": 80.0, |
|
"grad_norm": 5.15329647064209, |
|
"learning_rate": 7.4074074074074075e-06, |
|
"loss": 0.1304, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 80.0, |
|
"eval_accuracy": 0.7887323943661971, |
|
"eval_loss": 0.6195448637008667, |
|
"eval_runtime": 0.9685, |
|
"eval_samples_per_second": 73.307, |
|
"eval_steps_per_second": 3.097, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 80.92307692307692, |
|
"eval_accuracy": 0.7605633802816901, |
|
"eval_loss": 0.5740300416946411, |
|
"eval_runtime": 0.9291, |
|
"eval_samples_per_second": 76.417, |
|
"eval_steps_per_second": 3.229, |
|
"step": 263 |
|
}, |
|
{ |
|
"epoch": 81.84615384615384, |
|
"eval_accuracy": 0.8169014084507042, |
|
"eval_loss": 0.5652071237564087, |
|
"eval_runtime": 0.9331, |
|
"eval_samples_per_second": 76.091, |
|
"eval_steps_per_second": 3.215, |
|
"step": 266 |
|
}, |
|
{ |
|
"epoch": 82.76923076923077, |
|
"eval_accuracy": 0.8169014084507042, |
|
"eval_loss": 0.5749574303627014, |
|
"eval_runtime": 0.9389, |
|
"eval_samples_per_second": 75.618, |
|
"eval_steps_per_second": 3.195, |
|
"step": 269 |
|
}, |
|
{ |
|
"epoch": 83.07692307692308, |
|
"grad_norm": 2.7778007984161377, |
|
"learning_rate": 5.555555555555556e-06, |
|
"loss": 0.1152, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 84.0, |
|
"eval_accuracy": 0.7887323943661971, |
|
"eval_loss": 0.5829400420188904, |
|
"eval_runtime": 0.9675, |
|
"eval_samples_per_second": 73.381, |
|
"eval_steps_per_second": 3.101, |
|
"step": 273 |
|
}, |
|
{ |
|
"epoch": 84.92307692307692, |
|
"eval_accuracy": 0.7887323943661971, |
|
"eval_loss": 0.5853610038757324, |
|
"eval_runtime": 0.9581, |
|
"eval_samples_per_second": 74.104, |
|
"eval_steps_per_second": 3.131, |
|
"step": 276 |
|
}, |
|
{ |
|
"epoch": 85.84615384615384, |
|
"eval_accuracy": 0.7887323943661971, |
|
"eval_loss": 0.5853627324104309, |
|
"eval_runtime": 0.9385, |
|
"eval_samples_per_second": 75.649, |
|
"eval_steps_per_second": 3.196, |
|
"step": 279 |
|
}, |
|
{ |
|
"epoch": 86.15384615384616, |
|
"grad_norm": 2.607625722885132, |
|
"learning_rate": 3.7037037037037037e-06, |
|
"loss": 0.1069, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 86.76923076923077, |
|
"eval_accuracy": 0.8028169014084507, |
|
"eval_loss": 0.5825924277305603, |
|
"eval_runtime": 0.9377, |
|
"eval_samples_per_second": 75.72, |
|
"eval_steps_per_second": 3.199, |
|
"step": 282 |
|
}, |
|
{ |
|
"epoch": 88.0, |
|
"eval_accuracy": 0.7887323943661971, |
|
"eval_loss": 0.5838791728019714, |
|
"eval_runtime": 0.9668, |
|
"eval_samples_per_second": 73.437, |
|
"eval_steps_per_second": 3.103, |
|
"step": 286 |
|
}, |
|
{ |
|
"epoch": 88.92307692307692, |
|
"eval_accuracy": 0.8169014084507042, |
|
"eval_loss": 0.5792059898376465, |
|
"eval_runtime": 0.9421, |
|
"eval_samples_per_second": 75.36, |
|
"eval_steps_per_second": 3.184, |
|
"step": 289 |
|
}, |
|
{ |
|
"epoch": 89.23076923076923, |
|
"grad_norm": 3.399799108505249, |
|
"learning_rate": 1.8518518518518519e-06, |
|
"loss": 0.122, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 89.84615384615384, |
|
"eval_accuracy": 0.8169014084507042, |
|
"eval_loss": 0.5755288600921631, |
|
"eval_runtime": 0.9561, |
|
"eval_samples_per_second": 74.263, |
|
"eval_steps_per_second": 3.138, |
|
"step": 292 |
|
}, |
|
{ |
|
"epoch": 90.76923076923077, |
|
"eval_accuracy": 0.8169014084507042, |
|
"eval_loss": 0.5751092433929443, |
|
"eval_runtime": 0.9263, |
|
"eval_samples_per_second": 76.65, |
|
"eval_steps_per_second": 3.239, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 92.0, |
|
"eval_accuracy": 0.8169014084507042, |
|
"eval_loss": 0.5747934579849243, |
|
"eval_runtime": 0.9473, |
|
"eval_samples_per_second": 74.952, |
|
"eval_steps_per_second": 3.167, |
|
"step": 299 |
|
}, |
|
{ |
|
"epoch": 92.3076923076923, |
|
"grad_norm": 2.51318621635437, |
|
"learning_rate": 0.0, |
|
"loss": 0.1268, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 92.3076923076923, |
|
"eval_accuracy": 0.8169014084507042, |
|
"eval_loss": 0.5748143196105957, |
|
"eval_runtime": 0.9381, |
|
"eval_samples_per_second": 75.682, |
|
"eval_steps_per_second": 3.198, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 92.3076923076923, |
|
"step": 300, |
|
"total_flos": 2.8402872494292173e+18, |
|
"train_loss": 0.25812069336573285, |
|
"train_runtime": 1594.5895, |
|
"train_samples_per_second": 24.897, |
|
"train_steps_per_second": 0.188 |
|
}, |
|
{ |
|
"epoch": 92.3076923076923, |
|
"eval_accuracy": 0.8450704225352113, |
|
"eval_loss": 0.5367295145988464, |
|
"eval_runtime": 0.9601, |
|
"eval_samples_per_second": 73.951, |
|
"eval_steps_per_second": 3.125, |
|
"step": 300 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 300, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 100, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 2.8402872494292173e+18, |
|
"train_batch_size": 32, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|