{ "best_metric": 1.1656934022903442, "best_model_checkpoint": "convnext-tiny-224-finetuned/checkpoint-160", "epoch": 18.823529411764707, "eval_steps": 500, "global_step": 160, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.9411764705882353, "eval_accuracy": { "accuracy": 0.18382352941176472 }, "eval_logLoss": 1.6071832180023193, "eval_loss": 1.6071830987930298, "eval_runtime": 244.9974, "eval_samples_per_second": 1.11, "eval_steps_per_second": 0.037, "step": 8 }, { "epoch": 1.1764705882352942, "grad_norm": 1.0974622964859009, "learning_rate": 3.125e-05, "loss": 1.6101, "step": 10 }, { "epoch": 2.0, "eval_accuracy": { "accuracy": 0.31985294117647056 }, "eval_logLoss": 1.5668261051177979, "eval_loss": 1.5668261051177979, "eval_runtime": 4.797, "eval_samples_per_second": 56.702, "eval_steps_per_second": 1.876, "step": 17 }, { "epoch": 2.3529411764705883, "grad_norm": 1.2453666925430298, "learning_rate": 4.8611111111111115e-05, "loss": 1.5645, "step": 20 }, { "epoch": 2.9411764705882355, "eval_accuracy": { "accuracy": 0.33455882352941174 }, "eval_logLoss": 1.5246020555496216, "eval_loss": 1.5246021747589111, "eval_runtime": 4.7926, "eval_samples_per_second": 56.754, "eval_steps_per_second": 1.878, "step": 25 }, { "epoch": 3.5294117647058822, "grad_norm": 1.36457097530365, "learning_rate": 4.5138888888888894e-05, "loss": 1.4902, "step": 30 }, { "epoch": 4.0, "eval_accuracy": { "accuracy": 0.4007352941176471 }, "eval_logLoss": 1.4773920774459839, "eval_loss": 1.4773920774459839, "eval_runtime": 4.9915, "eval_samples_per_second": 54.493, "eval_steps_per_second": 1.803, "step": 34 }, { "epoch": 4.705882352941177, "grad_norm": 1.3080309629440308, "learning_rate": 4.166666666666667e-05, "loss": 1.4243, "step": 40 }, { "epoch": 4.9411764705882355, "eval_accuracy": { "accuracy": 0.44485294117647056 }, "eval_logLoss": 1.4283257722854614, "eval_loss": 1.4283257722854614, "eval_runtime": 4.7862, "eval_samples_per_second": 56.831, "eval_steps_per_second": 1.88, "step": 42 }, { "epoch": 5.882352941176471, "grad_norm": 1.7825312614440918, "learning_rate": 3.8194444444444444e-05, "loss": 1.3502, "step": 50 }, { "epoch": 6.0, "eval_accuracy": { "accuracy": 0.48161764705882354 }, "eval_logLoss": 1.3747049570083618, "eval_loss": 1.3747047185897827, "eval_runtime": 4.9735, "eval_samples_per_second": 54.69, "eval_steps_per_second": 1.81, "step": 51 }, { "epoch": 6.9411764705882355, "eval_accuracy": { "accuracy": 0.48161764705882354 }, "eval_logLoss": 1.3332045078277588, "eval_loss": 1.3332043886184692, "eval_runtime": 4.9941, "eval_samples_per_second": 54.464, "eval_steps_per_second": 1.802, "step": 59 }, { "epoch": 7.0588235294117645, "grad_norm": 1.616268515586853, "learning_rate": 3.472222222222222e-05, "loss": 1.2906, "step": 60 }, { "epoch": 8.0, "eval_accuracy": { "accuracy": 0.5036764705882353 }, "eval_logLoss": 1.2978487014770508, "eval_loss": 1.2978484630584717, "eval_runtime": 4.9213, "eval_samples_per_second": 55.27, "eval_steps_per_second": 1.829, "step": 68 }, { "epoch": 8.235294117647058, "grad_norm": 1.5584542751312256, "learning_rate": 3.125e-05, "loss": 1.2371, "step": 70 }, { "epoch": 8.941176470588236, "eval_accuracy": { "accuracy": 0.5147058823529411 }, "eval_logLoss": 1.2702081203460693, "eval_loss": 1.2702082395553589, "eval_runtime": 5.014, "eval_samples_per_second": 54.248, "eval_steps_per_second": 1.795, "step": 76 }, { "epoch": 9.411764705882353, "grad_norm": 1.5939627885818481, "learning_rate": 2.777777777777778e-05, "loss": 1.1856, "step": 80 }, { "epoch": 10.0, "eval_accuracy": { "accuracy": 0.5404411764705882 }, "eval_logLoss": 1.243437647819519, "eval_loss": 1.2434377670288086, "eval_runtime": 4.901, "eval_samples_per_second": 55.499, "eval_steps_per_second": 1.836, "step": 85 }, { "epoch": 10.588235294117647, "grad_norm": 1.362315058708191, "learning_rate": 2.4305555555555558e-05, "loss": 1.1506, "step": 90 }, { "epoch": 10.941176470588236, "eval_accuracy": { "accuracy": 0.5477941176470589 }, "eval_logLoss": 1.2299580574035645, "eval_loss": 1.2299582958221436, "eval_runtime": 4.9331, "eval_samples_per_second": 55.138, "eval_steps_per_second": 1.824, "step": 93 }, { "epoch": 11.764705882352942, "grad_norm": 1.4996492862701416, "learning_rate": 2.0833333333333336e-05, "loss": 1.0987, "step": 100 }, { "epoch": 12.0, "eval_accuracy": { "accuracy": 0.5588235294117647 }, "eval_logLoss": 1.2087745666503906, "eval_loss": 1.2087748050689697, "eval_runtime": 4.8571, "eval_samples_per_second": 56.0, "eval_steps_per_second": 1.853, "step": 102 }, { "epoch": 12.941176470588236, "grad_norm": 1.7451958656311035, "learning_rate": 1.736111111111111e-05, "loss": 1.0758, "step": 110 }, { "epoch": 12.941176470588236, "eval_accuracy": { "accuracy": 0.5514705882352942 }, "eval_logLoss": 1.1948641538619995, "eval_loss": 1.19486403465271, "eval_runtime": 4.994, "eval_samples_per_second": 54.465, "eval_steps_per_second": 1.802, "step": 110 }, { "epoch": 14.0, "eval_accuracy": { "accuracy": 0.5588235294117647 }, "eval_logLoss": 1.1896252632141113, "eval_loss": 1.1896252632141113, "eval_runtime": 4.9288, "eval_samples_per_second": 55.186, "eval_steps_per_second": 1.826, "step": 119 }, { "epoch": 14.117647058823529, "grad_norm": 1.9521229267120361, "learning_rate": 1.388888888888889e-05, "loss": 1.0483, "step": 120 }, { "epoch": 14.941176470588236, "eval_accuracy": { "accuracy": 0.5698529411764706 }, "eval_logLoss": 1.177284598350525, "eval_loss": 1.177284598350525, "eval_runtime": 4.8123, "eval_samples_per_second": 56.521, "eval_steps_per_second": 1.87, "step": 127 }, { "epoch": 15.294117647058824, "grad_norm": 1.6353868246078491, "learning_rate": 1.0416666666666668e-05, "loss": 1.0346, "step": 130 }, { "epoch": 16.0, "eval_accuracy": { "accuracy": 0.5735294117647058 }, "eval_logLoss": 1.1718555688858032, "eval_loss": 1.1718555688858032, "eval_runtime": 4.8108, "eval_samples_per_second": 56.539, "eval_steps_per_second": 1.871, "step": 136 }, { "epoch": 16.470588235294116, "grad_norm": 1.8729331493377686, "learning_rate": 6.944444444444445e-06, "loss": 1.0215, "step": 140 }, { "epoch": 16.941176470588236, "eval_accuracy": { "accuracy": 0.5698529411764706 }, "eval_logLoss": 1.170174241065979, "eval_loss": 1.1701741218566895, "eval_runtime": 4.747, "eval_samples_per_second": 57.299, "eval_steps_per_second": 1.896, "step": 144 }, { "epoch": 17.647058823529413, "grad_norm": 2.009471893310547, "learning_rate": 3.4722222222222224e-06, "loss": 1.0177, "step": 150 }, { "epoch": 18.0, "eval_accuracy": { "accuracy": 0.5772058823529411 }, "eval_logLoss": 1.1665880680084229, "eval_loss": 1.1665880680084229, "eval_runtime": 4.8261, "eval_samples_per_second": 56.361, "eval_steps_per_second": 1.865, "step": 153 }, { "epoch": 18.823529411764707, "grad_norm": 1.6420903205871582, "learning_rate": 0.0, "loss": 0.9956, "step": 160 }, { "epoch": 18.823529411764707, "eval_accuracy": { "accuracy": 0.5808823529411765 }, "eval_logLoss": 1.1656935214996338, "eval_loss": 1.1656934022903442, "eval_runtime": 5.2789, "eval_samples_per_second": 51.526, "eval_steps_per_second": 1.705, "step": 160 }, { "epoch": 18.823529411764707, "step": 160, "total_flos": 5.146567075115827e+17, "train_loss": 1.2247119784355163, "train_runtime": 1875.9294, "train_samples_per_second": 11.6, "train_steps_per_second": 0.085 } ], "logging_steps": 10, "max_steps": 160, "num_input_tokens_seen": 0, "num_train_epochs": 20, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 5.146567075115827e+17, "train_batch_size": 32, "trial_name": null, "trial_params": null }