|
{ |
|
"best_metric": 1.0196388959884644, |
|
"best_model_checkpoint": "convnext-tiny-224-finetuned/checkpoint-261", |
|
"epoch": 28.363636363636363, |
|
"eval_steps": 500, |
|
"global_step": 390, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.7272727272727273, |
|
"grad_norm": 0.9955764412879944, |
|
"learning_rate": 1.282051282051282e-05, |
|
"loss": 1.6135, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.9454545454545454, |
|
"eval_accuracy": { |
|
"accuracy": 0.22323462414578588 |
|
}, |
|
"eval_logLoss": 1.5881173610687256, |
|
"eval_loss": 1.588117241859436, |
|
"eval_runtime": 405.8824, |
|
"eval_samples_per_second": 1.082, |
|
"eval_steps_per_second": 0.034, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 1.4545454545454546, |
|
"grad_norm": 1.0506945848464966, |
|
"learning_rate": 2.564102564102564e-05, |
|
"loss": 1.5823, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 1.9636363636363636, |
|
"eval_accuracy": { |
|
"accuracy": 0.35990888382687924 |
|
}, |
|
"eval_logLoss": 1.5302075147628784, |
|
"eval_loss": 1.5302073955535889, |
|
"eval_runtime": 6.3692, |
|
"eval_samples_per_second": 68.926, |
|
"eval_steps_per_second": 2.198, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 2.1818181818181817, |
|
"grad_norm": 1.089643120765686, |
|
"learning_rate": 3.846153846153846e-05, |
|
"loss": 1.5485, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 2.909090909090909, |
|
"grad_norm": 1.260988712310791, |
|
"learning_rate": 4.985754985754986e-05, |
|
"loss": 1.4988, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 2.981818181818182, |
|
"eval_accuracy": { |
|
"accuracy": 0.4874715261958998 |
|
}, |
|
"eval_logLoss": 1.4480493068695068, |
|
"eval_loss": 1.4480493068695068, |
|
"eval_runtime": 6.4414, |
|
"eval_samples_per_second": 68.153, |
|
"eval_steps_per_second": 2.173, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 3.6363636363636362, |
|
"grad_norm": 1.314483642578125, |
|
"learning_rate": 4.8433048433048433e-05, |
|
"loss": 1.4303, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": { |
|
"accuracy": 0.5034168564920274 |
|
}, |
|
"eval_logLoss": 1.3424228429794312, |
|
"eval_loss": 1.3424230813980103, |
|
"eval_runtime": 6.3233, |
|
"eval_samples_per_second": 69.426, |
|
"eval_steps_per_second": 2.214, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 4.363636363636363, |
|
"grad_norm": 1.4022053480148315, |
|
"learning_rate": 4.700854700854701e-05, |
|
"loss": 1.3653, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 4.945454545454545, |
|
"eval_accuracy": { |
|
"accuracy": 0.5239179954441914 |
|
}, |
|
"eval_logLoss": 1.2543689012527466, |
|
"eval_loss": 1.2543689012527466, |
|
"eval_runtime": 6.3884, |
|
"eval_samples_per_second": 68.718, |
|
"eval_steps_per_second": 2.191, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 5.090909090909091, |
|
"grad_norm": 1.9723025560379028, |
|
"learning_rate": 4.558404558404559e-05, |
|
"loss": 1.2829, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 5.818181818181818, |
|
"grad_norm": 1.580014944076538, |
|
"learning_rate": 4.415954415954416e-05, |
|
"loss": 1.2232, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 5.963636363636364, |
|
"eval_accuracy": { |
|
"accuracy": 0.5239179954441914 |
|
}, |
|
"eval_logLoss": 1.1867280006408691, |
|
"eval_loss": 1.1867281198501587, |
|
"eval_runtime": 6.3056, |
|
"eval_samples_per_second": 69.621, |
|
"eval_steps_per_second": 2.22, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 6.545454545454545, |
|
"grad_norm": 1.6402294635772705, |
|
"learning_rate": 4.2735042735042735e-05, |
|
"loss": 1.1734, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 6.9818181818181815, |
|
"eval_accuracy": { |
|
"accuracy": 0.5466970387243736 |
|
}, |
|
"eval_logLoss": 1.1329878568649292, |
|
"eval_loss": 1.1329878568649292, |
|
"eval_runtime": 6.5296, |
|
"eval_samples_per_second": 67.232, |
|
"eval_steps_per_second": 2.144, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 7.2727272727272725, |
|
"grad_norm": 2.135455846786499, |
|
"learning_rate": 4.131054131054131e-05, |
|
"loss": 1.146, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"grad_norm": 2.3969764709472656, |
|
"learning_rate": 3.988603988603989e-05, |
|
"loss": 1.0747, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": { |
|
"accuracy": 0.5626423690205011 |
|
}, |
|
"eval_logLoss": 1.1196777820587158, |
|
"eval_loss": 1.1196777820587158, |
|
"eval_runtime": 6.221, |
|
"eval_samples_per_second": 70.567, |
|
"eval_steps_per_second": 2.25, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 8.727272727272727, |
|
"grad_norm": 2.5098071098327637, |
|
"learning_rate": 3.846153846153846e-05, |
|
"loss": 1.0405, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 8.945454545454545, |
|
"eval_accuracy": { |
|
"accuracy": 0.5535307517084282 |
|
}, |
|
"eval_logLoss": 1.0871223211288452, |
|
"eval_loss": 1.0871223211288452, |
|
"eval_runtime": 6.225, |
|
"eval_samples_per_second": 70.522, |
|
"eval_steps_per_second": 2.249, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 9.454545454545455, |
|
"grad_norm": 2.5964548587799072, |
|
"learning_rate": 3.7037037037037037e-05, |
|
"loss": 1.0313, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 9.963636363636363, |
|
"eval_accuracy": { |
|
"accuracy": 0.5671981776765376 |
|
}, |
|
"eval_logLoss": 1.0899914503097534, |
|
"eval_loss": 1.0899913311004639, |
|
"eval_runtime": 6.2747, |
|
"eval_samples_per_second": 69.964, |
|
"eval_steps_per_second": 2.231, |
|
"step": 137 |
|
}, |
|
{ |
|
"epoch": 10.181818181818182, |
|
"grad_norm": 3.431312084197998, |
|
"learning_rate": 3.561253561253561e-05, |
|
"loss": 0.9867, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 10.909090909090908, |
|
"grad_norm": 1.9337528944015503, |
|
"learning_rate": 3.418803418803419e-05, |
|
"loss": 0.959, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 10.981818181818182, |
|
"eval_accuracy": { |
|
"accuracy": 0.5603644646924829 |
|
}, |
|
"eval_logLoss": 1.0766180753707886, |
|
"eval_loss": 1.0766180753707886, |
|
"eval_runtime": 6.3132, |
|
"eval_samples_per_second": 69.537, |
|
"eval_steps_per_second": 2.218, |
|
"step": 151 |
|
}, |
|
{ |
|
"epoch": 11.636363636363637, |
|
"grad_norm": 2.0123398303985596, |
|
"learning_rate": 3.2763532763532764e-05, |
|
"loss": 0.9314, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": { |
|
"accuracy": 0.5603644646924829 |
|
}, |
|
"eval_logLoss": 1.0608227252960205, |
|
"eval_loss": 1.0608227252960205, |
|
"eval_runtime": 6.2643, |
|
"eval_samples_per_second": 70.079, |
|
"eval_steps_per_second": 2.235, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 12.363636363636363, |
|
"grad_norm": 2.423022508621216, |
|
"learning_rate": 3.133903133903134e-05, |
|
"loss": 0.9102, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 12.945454545454545, |
|
"eval_accuracy": { |
|
"accuracy": 0.5649202733485194 |
|
}, |
|
"eval_logLoss": 1.0388399362564087, |
|
"eval_loss": 1.0388399362564087, |
|
"eval_runtime": 6.2456, |
|
"eval_samples_per_second": 70.289, |
|
"eval_steps_per_second": 2.242, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 13.090909090909092, |
|
"grad_norm": 2.3961944580078125, |
|
"learning_rate": 2.9914529914529915e-05, |
|
"loss": 0.8825, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 13.818181818181818, |
|
"grad_norm": 2.1838817596435547, |
|
"learning_rate": 2.8490028490028492e-05, |
|
"loss": 0.8437, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 13.963636363636363, |
|
"eval_accuracy": { |
|
"accuracy": 0.5785876993166287 |
|
}, |
|
"eval_logLoss": 1.0331932306289673, |
|
"eval_loss": 1.0331931114196777, |
|
"eval_runtime": 6.3139, |
|
"eval_samples_per_second": 69.529, |
|
"eval_steps_per_second": 2.217, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 14.545454545454545, |
|
"grad_norm": 2.22560453414917, |
|
"learning_rate": 2.706552706552707e-05, |
|
"loss": 0.8234, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 14.981818181818182, |
|
"eval_accuracy": { |
|
"accuracy": 0.5763097949886105 |
|
}, |
|
"eval_logLoss": 1.030157446861267, |
|
"eval_loss": 1.0301573276519775, |
|
"eval_runtime": 6.2242, |
|
"eval_samples_per_second": 70.531, |
|
"eval_steps_per_second": 2.249, |
|
"step": 206 |
|
}, |
|
{ |
|
"epoch": 15.272727272727273, |
|
"grad_norm": 2.565896987915039, |
|
"learning_rate": 2.564102564102564e-05, |
|
"loss": 0.7888, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"grad_norm": 4.106865882873535, |
|
"learning_rate": 2.4216524216524217e-05, |
|
"loss": 0.7883, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_accuracy": { |
|
"accuracy": 0.5649202733485194 |
|
}, |
|
"eval_logLoss": 1.0276356935501099, |
|
"eval_loss": 1.0276355743408203, |
|
"eval_runtime": 6.221, |
|
"eval_samples_per_second": 70.568, |
|
"eval_steps_per_second": 2.25, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 16.727272727272727, |
|
"grad_norm": 2.70666766166687, |
|
"learning_rate": 2.2792022792022794e-05, |
|
"loss": 0.7364, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 16.945454545454545, |
|
"eval_accuracy": { |
|
"accuracy": 0.5649202733485194 |
|
}, |
|
"eval_logLoss": 1.0277541875839233, |
|
"eval_loss": 1.0277544260025024, |
|
"eval_runtime": 6.2998, |
|
"eval_samples_per_second": 69.685, |
|
"eval_steps_per_second": 2.222, |
|
"step": 233 |
|
}, |
|
{ |
|
"epoch": 17.454545454545453, |
|
"grad_norm": 3.2285561561584473, |
|
"learning_rate": 2.1367521367521368e-05, |
|
"loss": 0.7561, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 17.963636363636365, |
|
"eval_accuracy": { |
|
"accuracy": 0.5649202733485194 |
|
}, |
|
"eval_logLoss": 1.0257948637008667, |
|
"eval_loss": 1.0257947444915771, |
|
"eval_runtime": 6.223, |
|
"eval_samples_per_second": 70.544, |
|
"eval_steps_per_second": 2.25, |
|
"step": 247 |
|
}, |
|
{ |
|
"epoch": 18.181818181818183, |
|
"grad_norm": 3.2319438457489014, |
|
"learning_rate": 1.9943019943019945e-05, |
|
"loss": 0.7162, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 18.90909090909091, |
|
"grad_norm": 3.1322107315063477, |
|
"learning_rate": 1.8518518518518518e-05, |
|
"loss": 0.7062, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 18.98181818181818, |
|
"eval_accuracy": { |
|
"accuracy": 0.5694760820045558 |
|
}, |
|
"eval_logLoss": 1.019639015197754, |
|
"eval_loss": 1.0196388959884644, |
|
"eval_runtime": 6.1923, |
|
"eval_samples_per_second": 70.894, |
|
"eval_steps_per_second": 2.261, |
|
"step": 261 |
|
}, |
|
{ |
|
"epoch": 19.636363636363637, |
|
"grad_norm": 2.8413918018341064, |
|
"learning_rate": 1.7094017094017095e-05, |
|
"loss": 0.6897, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_accuracy": { |
|
"accuracy": 0.5558086560364465 |
|
}, |
|
"eval_logLoss": 1.0308281183242798, |
|
"eval_loss": 1.0308281183242798, |
|
"eval_runtime": 6.1943, |
|
"eval_samples_per_second": 70.871, |
|
"eval_steps_per_second": 2.26, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 20.363636363636363, |
|
"grad_norm": 2.9819412231445312, |
|
"learning_rate": 1.566951566951567e-05, |
|
"loss": 0.6511, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 20.945454545454545, |
|
"eval_accuracy": { |
|
"accuracy": 0.5626423690205011 |
|
}, |
|
"eval_logLoss": 1.0247496366500854, |
|
"eval_loss": 1.0247496366500854, |
|
"eval_runtime": 6.2512, |
|
"eval_samples_per_second": 70.226, |
|
"eval_steps_per_second": 2.24, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 21.09090909090909, |
|
"grad_norm": 4.308482646942139, |
|
"learning_rate": 1.4245014245014246e-05, |
|
"loss": 0.6456, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 21.818181818181817, |
|
"grad_norm": 3.1177256107330322, |
|
"learning_rate": 1.282051282051282e-05, |
|
"loss": 0.6338, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 21.963636363636365, |
|
"eval_accuracy": { |
|
"accuracy": 0.5603644646924829 |
|
}, |
|
"eval_logLoss": 1.0310317277908325, |
|
"eval_loss": 1.031031608581543, |
|
"eval_runtime": 6.1732, |
|
"eval_samples_per_second": 71.114, |
|
"eval_steps_per_second": 2.268, |
|
"step": 302 |
|
}, |
|
{ |
|
"epoch": 22.545454545454547, |
|
"grad_norm": 2.941620111465454, |
|
"learning_rate": 1.1396011396011397e-05, |
|
"loss": 0.619, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 22.98181818181818, |
|
"eval_accuracy": { |
|
"accuracy": 0.5671981776765376 |
|
}, |
|
"eval_logLoss": 1.0257664918899536, |
|
"eval_loss": 1.025766372680664, |
|
"eval_runtime": 6.1895, |
|
"eval_samples_per_second": 70.927, |
|
"eval_steps_per_second": 2.262, |
|
"step": 316 |
|
}, |
|
{ |
|
"epoch": 23.272727272727273, |
|
"grad_norm": 3.034752368927002, |
|
"learning_rate": 9.971509971509972e-06, |
|
"loss": 0.6187, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"grad_norm": 3.183673620223999, |
|
"learning_rate": 8.547008547008548e-06, |
|
"loss": 0.6008, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"eval_accuracy": { |
|
"accuracy": 0.5626423690205011 |
|
}, |
|
"eval_logLoss": 1.0298672914505005, |
|
"eval_loss": 1.029867172241211, |
|
"eval_runtime": 6.3628, |
|
"eval_samples_per_second": 68.995, |
|
"eval_steps_per_second": 2.2, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 24.727272727272727, |
|
"grad_norm": 2.810622215270996, |
|
"learning_rate": 7.122507122507123e-06, |
|
"loss": 0.601, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 24.945454545454545, |
|
"eval_accuracy": { |
|
"accuracy": 0.5671981776765376 |
|
}, |
|
"eval_logLoss": 1.0329437255859375, |
|
"eval_loss": 1.0329439640045166, |
|
"eval_runtime": 6.2697, |
|
"eval_samples_per_second": 70.019, |
|
"eval_steps_per_second": 2.233, |
|
"step": 343 |
|
}, |
|
{ |
|
"epoch": 25.454545454545453, |
|
"grad_norm": 4.746636867523193, |
|
"learning_rate": 5.6980056980056985e-06, |
|
"loss": 0.595, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 25.963636363636365, |
|
"eval_accuracy": { |
|
"accuracy": 0.5694760820045558 |
|
}, |
|
"eval_logLoss": 1.0277411937713623, |
|
"eval_loss": 1.0277411937713623, |
|
"eval_runtime": 6.3782, |
|
"eval_samples_per_second": 68.828, |
|
"eval_steps_per_second": 2.195, |
|
"step": 357 |
|
}, |
|
{ |
|
"epoch": 26.181818181818183, |
|
"grad_norm": 3.6217236518859863, |
|
"learning_rate": 4.273504273504274e-06, |
|
"loss": 0.5713, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 26.90909090909091, |
|
"grad_norm": 4.607295989990234, |
|
"learning_rate": 2.8490028490028492e-06, |
|
"loss": 0.598, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 26.98181818181818, |
|
"eval_accuracy": { |
|
"accuracy": 0.5671981776765376 |
|
}, |
|
"eval_logLoss": 1.0288209915161133, |
|
"eval_loss": 1.0288212299346924, |
|
"eval_runtime": 6.3179, |
|
"eval_samples_per_second": 69.485, |
|
"eval_steps_per_second": 2.216, |
|
"step": 371 |
|
}, |
|
{ |
|
"epoch": 27.636363636363637, |
|
"grad_norm": 3.8291075229644775, |
|
"learning_rate": 1.4245014245014246e-06, |
|
"loss": 0.5771, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"eval_accuracy": { |
|
"accuracy": 0.5603644646924829 |
|
}, |
|
"eval_logLoss": 1.031100869178772, |
|
"eval_loss": 1.0311009883880615, |
|
"eval_runtime": 6.2861, |
|
"eval_samples_per_second": 69.836, |
|
"eval_steps_per_second": 2.227, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 28.363636363636363, |
|
"grad_norm": 3.588949203491211, |
|
"learning_rate": 0.0, |
|
"loss": 0.5829, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 28.363636363636363, |
|
"eval_accuracy": { |
|
"accuracy": 0.5626423690205011 |
|
}, |
|
"eval_logLoss": 1.0311228036880493, |
|
"eval_loss": 1.0311226844787598, |
|
"eval_runtime": 6.7799, |
|
"eval_samples_per_second": 64.75, |
|
"eval_steps_per_second": 2.065, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 28.363636363636363, |
|
"step": 390, |
|
"total_flos": 1.248846666821075e+18, |
|
"train_loss": 0.9185584948613094, |
|
"train_runtime": 3358.32, |
|
"train_samples_per_second": 15.651, |
|
"train_steps_per_second": 0.116 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 390, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 30, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.248846666821075e+18, |
|
"train_batch_size": 32, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|