{ "best_metric": 1.0196388959884644, "best_model_checkpoint": "convnext-tiny-224-finetuned/checkpoint-261", "epoch": 28.363636363636363, "eval_steps": 500, "global_step": 390, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.7272727272727273, "grad_norm": 0.9955764412879944, "learning_rate": 1.282051282051282e-05, "loss": 1.6135, "step": 10 }, { "epoch": 0.9454545454545454, "eval_accuracy": { "accuracy": 0.22323462414578588 }, "eval_logLoss": 1.5881173610687256, "eval_loss": 1.588117241859436, "eval_runtime": 405.8824, "eval_samples_per_second": 1.082, "eval_steps_per_second": 0.034, "step": 13 }, { "epoch": 1.4545454545454546, "grad_norm": 1.0506945848464966, "learning_rate": 2.564102564102564e-05, "loss": 1.5823, "step": 20 }, { "epoch": 1.9636363636363636, "eval_accuracy": { "accuracy": 0.35990888382687924 }, "eval_logLoss": 1.5302075147628784, "eval_loss": 1.5302073955535889, "eval_runtime": 6.3692, "eval_samples_per_second": 68.926, "eval_steps_per_second": 2.198, "step": 27 }, { "epoch": 2.1818181818181817, "grad_norm": 1.089643120765686, "learning_rate": 3.846153846153846e-05, "loss": 1.5485, "step": 30 }, { "epoch": 2.909090909090909, "grad_norm": 1.260988712310791, "learning_rate": 4.985754985754986e-05, "loss": 1.4988, "step": 40 }, { "epoch": 2.981818181818182, "eval_accuracy": { "accuracy": 0.4874715261958998 }, "eval_logLoss": 1.4480493068695068, "eval_loss": 1.4480493068695068, "eval_runtime": 6.4414, "eval_samples_per_second": 68.153, "eval_steps_per_second": 2.173, "step": 41 }, { "epoch": 3.6363636363636362, "grad_norm": 1.314483642578125, "learning_rate": 4.8433048433048433e-05, "loss": 1.4303, "step": 50 }, { "epoch": 4.0, "eval_accuracy": { "accuracy": 0.5034168564920274 }, "eval_logLoss": 1.3424228429794312, "eval_loss": 1.3424230813980103, "eval_runtime": 6.3233, "eval_samples_per_second": 69.426, "eval_steps_per_second": 2.214, "step": 55 }, { "epoch": 4.363636363636363, "grad_norm": 1.4022053480148315, "learning_rate": 4.700854700854701e-05, "loss": 1.3653, "step": 60 }, { "epoch": 4.945454545454545, "eval_accuracy": { "accuracy": 0.5239179954441914 }, "eval_logLoss": 1.2543689012527466, "eval_loss": 1.2543689012527466, "eval_runtime": 6.3884, "eval_samples_per_second": 68.718, "eval_steps_per_second": 2.191, "step": 68 }, { "epoch": 5.090909090909091, "grad_norm": 1.9723025560379028, "learning_rate": 4.558404558404559e-05, "loss": 1.2829, "step": 70 }, { "epoch": 5.818181818181818, "grad_norm": 1.580014944076538, "learning_rate": 4.415954415954416e-05, "loss": 1.2232, "step": 80 }, { "epoch": 5.963636363636364, "eval_accuracy": { "accuracy": 0.5239179954441914 }, "eval_logLoss": 1.1867280006408691, "eval_loss": 1.1867281198501587, "eval_runtime": 6.3056, "eval_samples_per_second": 69.621, "eval_steps_per_second": 2.22, "step": 82 }, { "epoch": 6.545454545454545, "grad_norm": 1.6402294635772705, "learning_rate": 4.2735042735042735e-05, "loss": 1.1734, "step": 90 }, { "epoch": 6.9818181818181815, "eval_accuracy": { "accuracy": 0.5466970387243736 }, "eval_logLoss": 1.1329878568649292, "eval_loss": 1.1329878568649292, "eval_runtime": 6.5296, "eval_samples_per_second": 67.232, "eval_steps_per_second": 2.144, "step": 96 }, { "epoch": 7.2727272727272725, "grad_norm": 2.135455846786499, "learning_rate": 4.131054131054131e-05, "loss": 1.146, "step": 100 }, { "epoch": 8.0, "grad_norm": 2.3969764709472656, "learning_rate": 3.988603988603989e-05, "loss": 1.0747, "step": 110 }, { "epoch": 8.0, "eval_accuracy": { "accuracy": 0.5626423690205011 }, "eval_logLoss": 1.1196777820587158, "eval_loss": 1.1196777820587158, "eval_runtime": 6.221, "eval_samples_per_second": 70.567, "eval_steps_per_second": 2.25, "step": 110 }, { "epoch": 8.727272727272727, "grad_norm": 2.5098071098327637, "learning_rate": 3.846153846153846e-05, "loss": 1.0405, "step": 120 }, { "epoch": 8.945454545454545, "eval_accuracy": { "accuracy": 0.5535307517084282 }, "eval_logLoss": 1.0871223211288452, "eval_loss": 1.0871223211288452, "eval_runtime": 6.225, "eval_samples_per_second": 70.522, "eval_steps_per_second": 2.249, "step": 123 }, { "epoch": 9.454545454545455, "grad_norm": 2.5964548587799072, "learning_rate": 3.7037037037037037e-05, "loss": 1.0313, "step": 130 }, { "epoch": 9.963636363636363, "eval_accuracy": { "accuracy": 0.5671981776765376 }, "eval_logLoss": 1.0899914503097534, "eval_loss": 1.0899913311004639, "eval_runtime": 6.2747, "eval_samples_per_second": 69.964, "eval_steps_per_second": 2.231, "step": 137 }, { "epoch": 10.181818181818182, "grad_norm": 3.431312084197998, "learning_rate": 3.561253561253561e-05, "loss": 0.9867, "step": 140 }, { "epoch": 10.909090909090908, "grad_norm": 1.9337528944015503, "learning_rate": 3.418803418803419e-05, "loss": 0.959, "step": 150 }, { "epoch": 10.981818181818182, "eval_accuracy": { "accuracy": 0.5603644646924829 }, "eval_logLoss": 1.0766180753707886, "eval_loss": 1.0766180753707886, "eval_runtime": 6.3132, "eval_samples_per_second": 69.537, "eval_steps_per_second": 2.218, "step": 151 }, { "epoch": 11.636363636363637, "grad_norm": 2.0123398303985596, "learning_rate": 3.2763532763532764e-05, "loss": 0.9314, "step": 160 }, { "epoch": 12.0, "eval_accuracy": { "accuracy": 0.5603644646924829 }, "eval_logLoss": 1.0608227252960205, "eval_loss": 1.0608227252960205, "eval_runtime": 6.2643, "eval_samples_per_second": 70.079, "eval_steps_per_second": 2.235, "step": 165 }, { "epoch": 12.363636363636363, "grad_norm": 2.423022508621216, "learning_rate": 3.133903133903134e-05, "loss": 0.9102, "step": 170 }, { "epoch": 12.945454545454545, "eval_accuracy": { "accuracy": 0.5649202733485194 }, "eval_logLoss": 1.0388399362564087, "eval_loss": 1.0388399362564087, "eval_runtime": 6.2456, "eval_samples_per_second": 70.289, "eval_steps_per_second": 2.242, "step": 178 }, { "epoch": 13.090909090909092, "grad_norm": 2.3961944580078125, "learning_rate": 2.9914529914529915e-05, "loss": 0.8825, "step": 180 }, { "epoch": 13.818181818181818, "grad_norm": 2.1838817596435547, "learning_rate": 2.8490028490028492e-05, "loss": 0.8437, "step": 190 }, { "epoch": 13.963636363636363, "eval_accuracy": { "accuracy": 0.5785876993166287 }, "eval_logLoss": 1.0331932306289673, "eval_loss": 1.0331931114196777, "eval_runtime": 6.3139, "eval_samples_per_second": 69.529, "eval_steps_per_second": 2.217, "step": 192 }, { "epoch": 14.545454545454545, "grad_norm": 2.22560453414917, "learning_rate": 2.706552706552707e-05, "loss": 0.8234, "step": 200 }, { "epoch": 14.981818181818182, "eval_accuracy": { "accuracy": 0.5763097949886105 }, "eval_logLoss": 1.030157446861267, "eval_loss": 1.0301573276519775, "eval_runtime": 6.2242, "eval_samples_per_second": 70.531, "eval_steps_per_second": 2.249, "step": 206 }, { "epoch": 15.272727272727273, "grad_norm": 2.565896987915039, "learning_rate": 2.564102564102564e-05, "loss": 0.7888, "step": 210 }, { "epoch": 16.0, "grad_norm": 4.106865882873535, "learning_rate": 2.4216524216524217e-05, "loss": 0.7883, "step": 220 }, { "epoch": 16.0, "eval_accuracy": { "accuracy": 0.5649202733485194 }, "eval_logLoss": 1.0276356935501099, "eval_loss": 1.0276355743408203, "eval_runtime": 6.221, "eval_samples_per_second": 70.568, "eval_steps_per_second": 2.25, "step": 220 }, { "epoch": 16.727272727272727, "grad_norm": 2.70666766166687, "learning_rate": 2.2792022792022794e-05, "loss": 0.7364, "step": 230 }, { "epoch": 16.945454545454545, "eval_accuracy": { "accuracy": 0.5649202733485194 }, "eval_logLoss": 1.0277541875839233, "eval_loss": 1.0277544260025024, "eval_runtime": 6.2998, "eval_samples_per_second": 69.685, "eval_steps_per_second": 2.222, "step": 233 }, { "epoch": 17.454545454545453, "grad_norm": 3.2285561561584473, "learning_rate": 2.1367521367521368e-05, "loss": 0.7561, "step": 240 }, { "epoch": 17.963636363636365, "eval_accuracy": { "accuracy": 0.5649202733485194 }, "eval_logLoss": 1.0257948637008667, "eval_loss": 1.0257947444915771, "eval_runtime": 6.223, "eval_samples_per_second": 70.544, "eval_steps_per_second": 2.25, "step": 247 }, { "epoch": 18.181818181818183, "grad_norm": 3.2319438457489014, "learning_rate": 1.9943019943019945e-05, "loss": 0.7162, "step": 250 }, { "epoch": 18.90909090909091, "grad_norm": 3.1322107315063477, "learning_rate": 1.8518518518518518e-05, "loss": 0.7062, "step": 260 }, { "epoch": 18.98181818181818, "eval_accuracy": { "accuracy": 0.5694760820045558 }, "eval_logLoss": 1.019639015197754, "eval_loss": 1.0196388959884644, "eval_runtime": 6.1923, "eval_samples_per_second": 70.894, "eval_steps_per_second": 2.261, "step": 261 }, { "epoch": 19.636363636363637, "grad_norm": 2.8413918018341064, "learning_rate": 1.7094017094017095e-05, "loss": 0.6897, "step": 270 }, { "epoch": 20.0, "eval_accuracy": { "accuracy": 0.5558086560364465 }, "eval_logLoss": 1.0308281183242798, "eval_loss": 1.0308281183242798, "eval_runtime": 6.1943, "eval_samples_per_second": 70.871, "eval_steps_per_second": 2.26, "step": 275 }, { "epoch": 20.363636363636363, "grad_norm": 2.9819412231445312, "learning_rate": 1.566951566951567e-05, "loss": 0.6511, "step": 280 }, { "epoch": 20.945454545454545, "eval_accuracy": { "accuracy": 0.5626423690205011 }, "eval_logLoss": 1.0247496366500854, "eval_loss": 1.0247496366500854, "eval_runtime": 6.2512, "eval_samples_per_second": 70.226, "eval_steps_per_second": 2.24, "step": 288 }, { "epoch": 21.09090909090909, "grad_norm": 4.308482646942139, "learning_rate": 1.4245014245014246e-05, "loss": 0.6456, "step": 290 }, { "epoch": 21.818181818181817, "grad_norm": 3.1177256107330322, "learning_rate": 1.282051282051282e-05, "loss": 0.6338, "step": 300 }, { "epoch": 21.963636363636365, "eval_accuracy": { "accuracy": 0.5603644646924829 }, "eval_logLoss": 1.0310317277908325, "eval_loss": 1.031031608581543, "eval_runtime": 6.1732, "eval_samples_per_second": 71.114, "eval_steps_per_second": 2.268, "step": 302 }, { "epoch": 22.545454545454547, "grad_norm": 2.941620111465454, "learning_rate": 1.1396011396011397e-05, "loss": 0.619, "step": 310 }, { "epoch": 22.98181818181818, "eval_accuracy": { "accuracy": 0.5671981776765376 }, "eval_logLoss": 1.0257664918899536, "eval_loss": 1.025766372680664, "eval_runtime": 6.1895, "eval_samples_per_second": 70.927, "eval_steps_per_second": 2.262, "step": 316 }, { "epoch": 23.272727272727273, "grad_norm": 3.034752368927002, "learning_rate": 9.971509971509972e-06, "loss": 0.6187, "step": 320 }, { "epoch": 24.0, "grad_norm": 3.183673620223999, "learning_rate": 8.547008547008548e-06, "loss": 0.6008, "step": 330 }, { "epoch": 24.0, "eval_accuracy": { "accuracy": 0.5626423690205011 }, "eval_logLoss": 1.0298672914505005, "eval_loss": 1.029867172241211, "eval_runtime": 6.3628, "eval_samples_per_second": 68.995, "eval_steps_per_second": 2.2, "step": 330 }, { "epoch": 24.727272727272727, "grad_norm": 2.810622215270996, "learning_rate": 7.122507122507123e-06, "loss": 0.601, "step": 340 }, { "epoch": 24.945454545454545, "eval_accuracy": { "accuracy": 0.5671981776765376 }, "eval_logLoss": 1.0329437255859375, "eval_loss": 1.0329439640045166, "eval_runtime": 6.2697, "eval_samples_per_second": 70.019, "eval_steps_per_second": 2.233, "step": 343 }, { "epoch": 25.454545454545453, "grad_norm": 4.746636867523193, "learning_rate": 5.6980056980056985e-06, "loss": 0.595, "step": 350 }, { "epoch": 25.963636363636365, "eval_accuracy": { "accuracy": 0.5694760820045558 }, "eval_logLoss": 1.0277411937713623, "eval_loss": 1.0277411937713623, "eval_runtime": 6.3782, "eval_samples_per_second": 68.828, "eval_steps_per_second": 2.195, "step": 357 }, { "epoch": 26.181818181818183, "grad_norm": 3.6217236518859863, "learning_rate": 4.273504273504274e-06, "loss": 0.5713, "step": 360 }, { "epoch": 26.90909090909091, "grad_norm": 4.607295989990234, "learning_rate": 2.8490028490028492e-06, "loss": 0.598, "step": 370 }, { "epoch": 26.98181818181818, "eval_accuracy": { "accuracy": 0.5671981776765376 }, "eval_logLoss": 1.0288209915161133, "eval_loss": 1.0288212299346924, "eval_runtime": 6.3179, "eval_samples_per_second": 69.485, "eval_steps_per_second": 2.216, "step": 371 }, { "epoch": 27.636363636363637, "grad_norm": 3.8291075229644775, "learning_rate": 1.4245014245014246e-06, "loss": 0.5771, "step": 380 }, { "epoch": 28.0, "eval_accuracy": { "accuracy": 0.5603644646924829 }, "eval_logLoss": 1.031100869178772, "eval_loss": 1.0311009883880615, "eval_runtime": 6.2861, "eval_samples_per_second": 69.836, "eval_steps_per_second": 2.227, "step": 385 }, { "epoch": 28.363636363636363, "grad_norm": 3.588949203491211, "learning_rate": 0.0, "loss": 0.5829, "step": 390 }, { "epoch": 28.363636363636363, "eval_accuracy": { "accuracy": 0.5626423690205011 }, "eval_logLoss": 1.0311228036880493, "eval_loss": 1.0311226844787598, "eval_runtime": 6.7799, "eval_samples_per_second": 64.75, "eval_steps_per_second": 2.065, "step": 390 }, { "epoch": 28.363636363636363, "step": 390, "total_flos": 1.248846666821075e+18, "train_loss": 0.9185584948613094, "train_runtime": 3358.32, "train_samples_per_second": 15.651, "train_steps_per_second": 0.116 } ], "logging_steps": 10, "max_steps": 390, "num_input_tokens_seen": 0, "num_train_epochs": 30, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1.248846666821075e+18, "train_batch_size": 32, "trial_name": null, "trial_params": null }