convnext-tiny-224-finetuned / trainer_state.json
karim155's picture
Training in progress, epoch 0
debea01 verified
raw
history blame
9.89 kB
{
"best_metric": 1.1656934022903442,
"best_model_checkpoint": "convnext-tiny-224-finetuned/checkpoint-160",
"epoch": 18.823529411764707,
"eval_steps": 500,
"global_step": 160,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.9411764705882353,
"eval_accuracy": {
"accuracy": 0.18382352941176472
},
"eval_logLoss": 1.6071832180023193,
"eval_loss": 1.6071830987930298,
"eval_runtime": 244.9974,
"eval_samples_per_second": 1.11,
"eval_steps_per_second": 0.037,
"step": 8
},
{
"epoch": 1.1764705882352942,
"grad_norm": 1.0974622964859009,
"learning_rate": 3.125e-05,
"loss": 1.6101,
"step": 10
},
{
"epoch": 2.0,
"eval_accuracy": {
"accuracy": 0.31985294117647056
},
"eval_logLoss": 1.5668261051177979,
"eval_loss": 1.5668261051177979,
"eval_runtime": 4.797,
"eval_samples_per_second": 56.702,
"eval_steps_per_second": 1.876,
"step": 17
},
{
"epoch": 2.3529411764705883,
"grad_norm": 1.2453666925430298,
"learning_rate": 4.8611111111111115e-05,
"loss": 1.5645,
"step": 20
},
{
"epoch": 2.9411764705882355,
"eval_accuracy": {
"accuracy": 0.33455882352941174
},
"eval_logLoss": 1.5246020555496216,
"eval_loss": 1.5246021747589111,
"eval_runtime": 4.7926,
"eval_samples_per_second": 56.754,
"eval_steps_per_second": 1.878,
"step": 25
},
{
"epoch": 3.5294117647058822,
"grad_norm": 1.36457097530365,
"learning_rate": 4.5138888888888894e-05,
"loss": 1.4902,
"step": 30
},
{
"epoch": 4.0,
"eval_accuracy": {
"accuracy": 0.4007352941176471
},
"eval_logLoss": 1.4773920774459839,
"eval_loss": 1.4773920774459839,
"eval_runtime": 4.9915,
"eval_samples_per_second": 54.493,
"eval_steps_per_second": 1.803,
"step": 34
},
{
"epoch": 4.705882352941177,
"grad_norm": 1.3080309629440308,
"learning_rate": 4.166666666666667e-05,
"loss": 1.4243,
"step": 40
},
{
"epoch": 4.9411764705882355,
"eval_accuracy": {
"accuracy": 0.44485294117647056
},
"eval_logLoss": 1.4283257722854614,
"eval_loss": 1.4283257722854614,
"eval_runtime": 4.7862,
"eval_samples_per_second": 56.831,
"eval_steps_per_second": 1.88,
"step": 42
},
{
"epoch": 5.882352941176471,
"grad_norm": 1.7825312614440918,
"learning_rate": 3.8194444444444444e-05,
"loss": 1.3502,
"step": 50
},
{
"epoch": 6.0,
"eval_accuracy": {
"accuracy": 0.48161764705882354
},
"eval_logLoss": 1.3747049570083618,
"eval_loss": 1.3747047185897827,
"eval_runtime": 4.9735,
"eval_samples_per_second": 54.69,
"eval_steps_per_second": 1.81,
"step": 51
},
{
"epoch": 6.9411764705882355,
"eval_accuracy": {
"accuracy": 0.48161764705882354
},
"eval_logLoss": 1.3332045078277588,
"eval_loss": 1.3332043886184692,
"eval_runtime": 4.9941,
"eval_samples_per_second": 54.464,
"eval_steps_per_second": 1.802,
"step": 59
},
{
"epoch": 7.0588235294117645,
"grad_norm": 1.616268515586853,
"learning_rate": 3.472222222222222e-05,
"loss": 1.2906,
"step": 60
},
{
"epoch": 8.0,
"eval_accuracy": {
"accuracy": 0.5036764705882353
},
"eval_logLoss": 1.2978487014770508,
"eval_loss": 1.2978484630584717,
"eval_runtime": 4.9213,
"eval_samples_per_second": 55.27,
"eval_steps_per_second": 1.829,
"step": 68
},
{
"epoch": 8.235294117647058,
"grad_norm": 1.5584542751312256,
"learning_rate": 3.125e-05,
"loss": 1.2371,
"step": 70
},
{
"epoch": 8.941176470588236,
"eval_accuracy": {
"accuracy": 0.5147058823529411
},
"eval_logLoss": 1.2702081203460693,
"eval_loss": 1.2702082395553589,
"eval_runtime": 5.014,
"eval_samples_per_second": 54.248,
"eval_steps_per_second": 1.795,
"step": 76
},
{
"epoch": 9.411764705882353,
"grad_norm": 1.5939627885818481,
"learning_rate": 2.777777777777778e-05,
"loss": 1.1856,
"step": 80
},
{
"epoch": 10.0,
"eval_accuracy": {
"accuracy": 0.5404411764705882
},
"eval_logLoss": 1.243437647819519,
"eval_loss": 1.2434377670288086,
"eval_runtime": 4.901,
"eval_samples_per_second": 55.499,
"eval_steps_per_second": 1.836,
"step": 85
},
{
"epoch": 10.588235294117647,
"grad_norm": 1.362315058708191,
"learning_rate": 2.4305555555555558e-05,
"loss": 1.1506,
"step": 90
},
{
"epoch": 10.941176470588236,
"eval_accuracy": {
"accuracy": 0.5477941176470589
},
"eval_logLoss": 1.2299580574035645,
"eval_loss": 1.2299582958221436,
"eval_runtime": 4.9331,
"eval_samples_per_second": 55.138,
"eval_steps_per_second": 1.824,
"step": 93
},
{
"epoch": 11.764705882352942,
"grad_norm": 1.4996492862701416,
"learning_rate": 2.0833333333333336e-05,
"loss": 1.0987,
"step": 100
},
{
"epoch": 12.0,
"eval_accuracy": {
"accuracy": 0.5588235294117647
},
"eval_logLoss": 1.2087745666503906,
"eval_loss": 1.2087748050689697,
"eval_runtime": 4.8571,
"eval_samples_per_second": 56.0,
"eval_steps_per_second": 1.853,
"step": 102
},
{
"epoch": 12.941176470588236,
"grad_norm": 1.7451958656311035,
"learning_rate": 1.736111111111111e-05,
"loss": 1.0758,
"step": 110
},
{
"epoch": 12.941176470588236,
"eval_accuracy": {
"accuracy": 0.5514705882352942
},
"eval_logLoss": 1.1948641538619995,
"eval_loss": 1.19486403465271,
"eval_runtime": 4.994,
"eval_samples_per_second": 54.465,
"eval_steps_per_second": 1.802,
"step": 110
},
{
"epoch": 14.0,
"eval_accuracy": {
"accuracy": 0.5588235294117647
},
"eval_logLoss": 1.1896252632141113,
"eval_loss": 1.1896252632141113,
"eval_runtime": 4.9288,
"eval_samples_per_second": 55.186,
"eval_steps_per_second": 1.826,
"step": 119
},
{
"epoch": 14.117647058823529,
"grad_norm": 1.9521229267120361,
"learning_rate": 1.388888888888889e-05,
"loss": 1.0483,
"step": 120
},
{
"epoch": 14.941176470588236,
"eval_accuracy": {
"accuracy": 0.5698529411764706
},
"eval_logLoss": 1.177284598350525,
"eval_loss": 1.177284598350525,
"eval_runtime": 4.8123,
"eval_samples_per_second": 56.521,
"eval_steps_per_second": 1.87,
"step": 127
},
{
"epoch": 15.294117647058824,
"grad_norm": 1.6353868246078491,
"learning_rate": 1.0416666666666668e-05,
"loss": 1.0346,
"step": 130
},
{
"epoch": 16.0,
"eval_accuracy": {
"accuracy": 0.5735294117647058
},
"eval_logLoss": 1.1718555688858032,
"eval_loss": 1.1718555688858032,
"eval_runtime": 4.8108,
"eval_samples_per_second": 56.539,
"eval_steps_per_second": 1.871,
"step": 136
},
{
"epoch": 16.470588235294116,
"grad_norm": 1.8729331493377686,
"learning_rate": 6.944444444444445e-06,
"loss": 1.0215,
"step": 140
},
{
"epoch": 16.941176470588236,
"eval_accuracy": {
"accuracy": 0.5698529411764706
},
"eval_logLoss": 1.170174241065979,
"eval_loss": 1.1701741218566895,
"eval_runtime": 4.747,
"eval_samples_per_second": 57.299,
"eval_steps_per_second": 1.896,
"step": 144
},
{
"epoch": 17.647058823529413,
"grad_norm": 2.009471893310547,
"learning_rate": 3.4722222222222224e-06,
"loss": 1.0177,
"step": 150
},
{
"epoch": 18.0,
"eval_accuracy": {
"accuracy": 0.5772058823529411
},
"eval_logLoss": 1.1665880680084229,
"eval_loss": 1.1665880680084229,
"eval_runtime": 4.8261,
"eval_samples_per_second": 56.361,
"eval_steps_per_second": 1.865,
"step": 153
},
{
"epoch": 18.823529411764707,
"grad_norm": 1.6420903205871582,
"learning_rate": 0.0,
"loss": 0.9956,
"step": 160
},
{
"epoch": 18.823529411764707,
"eval_accuracy": {
"accuracy": 0.5808823529411765
},
"eval_logLoss": 1.1656935214996338,
"eval_loss": 1.1656934022903442,
"eval_runtime": 5.2789,
"eval_samples_per_second": 51.526,
"eval_steps_per_second": 1.705,
"step": 160
},
{
"epoch": 18.823529411764707,
"step": 160,
"total_flos": 5.146567075115827e+17,
"train_loss": 1.2247119784355163,
"train_runtime": 1875.9294,
"train_samples_per_second": 11.6,
"train_steps_per_second": 0.085
}
],
"logging_steps": 10,
"max_steps": 160,
"num_input_tokens_seen": 0,
"num_train_epochs": 20,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 5.146567075115827e+17,
"train_batch_size": 32,
"trial_name": null,
"trial_params": null
}