|
{ |
|
"best_metric": 1.1656934022903442, |
|
"best_model_checkpoint": "convnext-tiny-224-finetuned/checkpoint-160", |
|
"epoch": 18.823529411764707, |
|
"eval_steps": 500, |
|
"global_step": 160, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.9411764705882353, |
|
"eval_accuracy": { |
|
"accuracy": 0.18382352941176472 |
|
}, |
|
"eval_logLoss": 1.6071832180023193, |
|
"eval_loss": 1.6071830987930298, |
|
"eval_runtime": 244.9974, |
|
"eval_samples_per_second": 1.11, |
|
"eval_steps_per_second": 0.037, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 1.1764705882352942, |
|
"grad_norm": 1.0974622964859009, |
|
"learning_rate": 3.125e-05, |
|
"loss": 1.6101, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": { |
|
"accuracy": 0.31985294117647056 |
|
}, |
|
"eval_logLoss": 1.5668261051177979, |
|
"eval_loss": 1.5668261051177979, |
|
"eval_runtime": 4.797, |
|
"eval_samples_per_second": 56.702, |
|
"eval_steps_per_second": 1.876, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 2.3529411764705883, |
|
"grad_norm": 1.2453666925430298, |
|
"learning_rate": 4.8611111111111115e-05, |
|
"loss": 1.5645, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 2.9411764705882355, |
|
"eval_accuracy": { |
|
"accuracy": 0.33455882352941174 |
|
}, |
|
"eval_logLoss": 1.5246020555496216, |
|
"eval_loss": 1.5246021747589111, |
|
"eval_runtime": 4.7926, |
|
"eval_samples_per_second": 56.754, |
|
"eval_steps_per_second": 1.878, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 3.5294117647058822, |
|
"grad_norm": 1.36457097530365, |
|
"learning_rate": 4.5138888888888894e-05, |
|
"loss": 1.4902, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": { |
|
"accuracy": 0.4007352941176471 |
|
}, |
|
"eval_logLoss": 1.4773920774459839, |
|
"eval_loss": 1.4773920774459839, |
|
"eval_runtime": 4.9915, |
|
"eval_samples_per_second": 54.493, |
|
"eval_steps_per_second": 1.803, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 4.705882352941177, |
|
"grad_norm": 1.3080309629440308, |
|
"learning_rate": 4.166666666666667e-05, |
|
"loss": 1.4243, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 4.9411764705882355, |
|
"eval_accuracy": { |
|
"accuracy": 0.44485294117647056 |
|
}, |
|
"eval_logLoss": 1.4283257722854614, |
|
"eval_loss": 1.4283257722854614, |
|
"eval_runtime": 4.7862, |
|
"eval_samples_per_second": 56.831, |
|
"eval_steps_per_second": 1.88, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 5.882352941176471, |
|
"grad_norm": 1.7825312614440918, |
|
"learning_rate": 3.8194444444444444e-05, |
|
"loss": 1.3502, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": { |
|
"accuracy": 0.48161764705882354 |
|
}, |
|
"eval_logLoss": 1.3747049570083618, |
|
"eval_loss": 1.3747047185897827, |
|
"eval_runtime": 4.9735, |
|
"eval_samples_per_second": 54.69, |
|
"eval_steps_per_second": 1.81, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 6.9411764705882355, |
|
"eval_accuracy": { |
|
"accuracy": 0.48161764705882354 |
|
}, |
|
"eval_logLoss": 1.3332045078277588, |
|
"eval_loss": 1.3332043886184692, |
|
"eval_runtime": 4.9941, |
|
"eval_samples_per_second": 54.464, |
|
"eval_steps_per_second": 1.802, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 7.0588235294117645, |
|
"grad_norm": 1.616268515586853, |
|
"learning_rate": 3.472222222222222e-05, |
|
"loss": 1.2906, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": { |
|
"accuracy": 0.5036764705882353 |
|
}, |
|
"eval_logLoss": 1.2978487014770508, |
|
"eval_loss": 1.2978484630584717, |
|
"eval_runtime": 4.9213, |
|
"eval_samples_per_second": 55.27, |
|
"eval_steps_per_second": 1.829, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 8.235294117647058, |
|
"grad_norm": 1.5584542751312256, |
|
"learning_rate": 3.125e-05, |
|
"loss": 1.2371, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 8.941176470588236, |
|
"eval_accuracy": { |
|
"accuracy": 0.5147058823529411 |
|
}, |
|
"eval_logLoss": 1.2702081203460693, |
|
"eval_loss": 1.2702082395553589, |
|
"eval_runtime": 5.014, |
|
"eval_samples_per_second": 54.248, |
|
"eval_steps_per_second": 1.795, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 9.411764705882353, |
|
"grad_norm": 1.5939627885818481, |
|
"learning_rate": 2.777777777777778e-05, |
|
"loss": 1.1856, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": { |
|
"accuracy": 0.5404411764705882 |
|
}, |
|
"eval_logLoss": 1.243437647819519, |
|
"eval_loss": 1.2434377670288086, |
|
"eval_runtime": 4.901, |
|
"eval_samples_per_second": 55.499, |
|
"eval_steps_per_second": 1.836, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 10.588235294117647, |
|
"grad_norm": 1.362315058708191, |
|
"learning_rate": 2.4305555555555558e-05, |
|
"loss": 1.1506, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 10.941176470588236, |
|
"eval_accuracy": { |
|
"accuracy": 0.5477941176470589 |
|
}, |
|
"eval_logLoss": 1.2299580574035645, |
|
"eval_loss": 1.2299582958221436, |
|
"eval_runtime": 4.9331, |
|
"eval_samples_per_second": 55.138, |
|
"eval_steps_per_second": 1.824, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 11.764705882352942, |
|
"grad_norm": 1.4996492862701416, |
|
"learning_rate": 2.0833333333333336e-05, |
|
"loss": 1.0987, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": { |
|
"accuracy": 0.5588235294117647 |
|
}, |
|
"eval_logLoss": 1.2087745666503906, |
|
"eval_loss": 1.2087748050689697, |
|
"eval_runtime": 4.8571, |
|
"eval_samples_per_second": 56.0, |
|
"eval_steps_per_second": 1.853, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 12.941176470588236, |
|
"grad_norm": 1.7451958656311035, |
|
"learning_rate": 1.736111111111111e-05, |
|
"loss": 1.0758, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 12.941176470588236, |
|
"eval_accuracy": { |
|
"accuracy": 0.5514705882352942 |
|
}, |
|
"eval_logLoss": 1.1948641538619995, |
|
"eval_loss": 1.19486403465271, |
|
"eval_runtime": 4.994, |
|
"eval_samples_per_second": 54.465, |
|
"eval_steps_per_second": 1.802, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_accuracy": { |
|
"accuracy": 0.5588235294117647 |
|
}, |
|
"eval_logLoss": 1.1896252632141113, |
|
"eval_loss": 1.1896252632141113, |
|
"eval_runtime": 4.9288, |
|
"eval_samples_per_second": 55.186, |
|
"eval_steps_per_second": 1.826, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 14.117647058823529, |
|
"grad_norm": 1.9521229267120361, |
|
"learning_rate": 1.388888888888889e-05, |
|
"loss": 1.0483, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 14.941176470588236, |
|
"eval_accuracy": { |
|
"accuracy": 0.5698529411764706 |
|
}, |
|
"eval_logLoss": 1.177284598350525, |
|
"eval_loss": 1.177284598350525, |
|
"eval_runtime": 4.8123, |
|
"eval_samples_per_second": 56.521, |
|
"eval_steps_per_second": 1.87, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 15.294117647058824, |
|
"grad_norm": 1.6353868246078491, |
|
"learning_rate": 1.0416666666666668e-05, |
|
"loss": 1.0346, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_accuracy": { |
|
"accuracy": 0.5735294117647058 |
|
}, |
|
"eval_logLoss": 1.1718555688858032, |
|
"eval_loss": 1.1718555688858032, |
|
"eval_runtime": 4.8108, |
|
"eval_samples_per_second": 56.539, |
|
"eval_steps_per_second": 1.871, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 16.470588235294116, |
|
"grad_norm": 1.8729331493377686, |
|
"learning_rate": 6.944444444444445e-06, |
|
"loss": 1.0215, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 16.941176470588236, |
|
"eval_accuracy": { |
|
"accuracy": 0.5698529411764706 |
|
}, |
|
"eval_logLoss": 1.170174241065979, |
|
"eval_loss": 1.1701741218566895, |
|
"eval_runtime": 4.747, |
|
"eval_samples_per_second": 57.299, |
|
"eval_steps_per_second": 1.896, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 17.647058823529413, |
|
"grad_norm": 2.009471893310547, |
|
"learning_rate": 3.4722222222222224e-06, |
|
"loss": 1.0177, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_accuracy": { |
|
"accuracy": 0.5772058823529411 |
|
}, |
|
"eval_logLoss": 1.1665880680084229, |
|
"eval_loss": 1.1665880680084229, |
|
"eval_runtime": 4.8261, |
|
"eval_samples_per_second": 56.361, |
|
"eval_steps_per_second": 1.865, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 18.823529411764707, |
|
"grad_norm": 1.6420903205871582, |
|
"learning_rate": 0.0, |
|
"loss": 0.9956, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 18.823529411764707, |
|
"eval_accuracy": { |
|
"accuracy": 0.5808823529411765 |
|
}, |
|
"eval_logLoss": 1.1656935214996338, |
|
"eval_loss": 1.1656934022903442, |
|
"eval_runtime": 5.2789, |
|
"eval_samples_per_second": 51.526, |
|
"eval_steps_per_second": 1.705, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 18.823529411764707, |
|
"step": 160, |
|
"total_flos": 5.146567075115827e+17, |
|
"train_loss": 1.2247119784355163, |
|
"train_runtime": 1875.9294, |
|
"train_samples_per_second": 11.6, |
|
"train_steps_per_second": 0.085 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 160, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 20, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 5.146567075115827e+17, |
|
"train_batch_size": 32, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|