|
{ |
|
"best_global_step": 392, |
|
"best_metric": 0.9479166666666666, |
|
"best_model_checkpoint": "BxSD_RGBCROP_Aug16F-8B16F-GACWDlr/checkpoint-392", |
|
"epoch": 11.025112107623318, |
|
"eval_steps": 500, |
|
"global_step": 672, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.004484304932735426, |
|
"grad_norm": 3.1429996490478516, |
|
"learning_rate": 4.0358744394618834e-07, |
|
"loss": 0.7032, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.008968609865470852, |
|
"grad_norm": 3.9541077613830566, |
|
"learning_rate": 8.520179372197309e-07, |
|
"loss": 0.6956, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.013452914798206279, |
|
"grad_norm": 3.382608413696289, |
|
"learning_rate": 1.3004484304932734e-06, |
|
"loss": 0.7003, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.017937219730941704, |
|
"grad_norm": 3.3949358463287354, |
|
"learning_rate": 1.7488789237668164e-06, |
|
"loss": 0.6888, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.02242152466367713, |
|
"grad_norm": 2.802041530609131, |
|
"learning_rate": 2.197309417040359e-06, |
|
"loss": 0.663, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.025112107623318385, |
|
"eval_accuracy": 0.6354166666666666, |
|
"eval_loss": 0.6602393984794617, |
|
"eval_runtime": 30.0703, |
|
"eval_samples_per_second": 6.385, |
|
"eval_steps_per_second": 0.798, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 1.0017937219730941, |
|
"grad_norm": 3.6131398677825928, |
|
"learning_rate": 2.6457399103139016e-06, |
|
"loss": 0.6442, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 1.0062780269058296, |
|
"grad_norm": 3.0706064701080322, |
|
"learning_rate": 3.0941704035874443e-06, |
|
"loss": 0.5936, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 1.010762331838565, |
|
"grad_norm": 3.5030055046081543, |
|
"learning_rate": 3.542600896860987e-06, |
|
"loss": 0.5627, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 1.0152466367713004, |
|
"grad_norm": 4.468491077423096, |
|
"learning_rate": 3.991031390134529e-06, |
|
"loss": 0.5351, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 1.0197309417040359, |
|
"grad_norm": 3.892331600189209, |
|
"learning_rate": 4.439461883408072e-06, |
|
"loss": 0.4513, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 1.0242152466367713, |
|
"grad_norm": 4.7978291511535645, |
|
"learning_rate": 4.887892376681615e-06, |
|
"loss": 0.4419, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 1.0251121076233183, |
|
"eval_accuracy": 0.8125, |
|
"eval_loss": 0.4702857434749603, |
|
"eval_runtime": 29.1485, |
|
"eval_samples_per_second": 6.587, |
|
"eval_steps_per_second": 0.823, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 2.0035874439461883, |
|
"grad_norm": 2.970027208328247, |
|
"learning_rate": 5.3363228699551574e-06, |
|
"loss": 0.3586, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 2.008071748878924, |
|
"grad_norm": 4.3814697265625, |
|
"learning_rate": 5.7847533632287e-06, |
|
"loss": 0.2856, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 2.012556053811659, |
|
"grad_norm": 3.017439603805542, |
|
"learning_rate": 6.233183856502243e-06, |
|
"loss": 0.2565, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 2.017040358744395, |
|
"grad_norm": 7.53488302230835, |
|
"learning_rate": 6.681614349775786e-06, |
|
"loss": 0.2532, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 2.02152466367713, |
|
"grad_norm": 1.6428829431533813, |
|
"learning_rate": 7.1300448430493275e-06, |
|
"loss": 0.1752, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 2.0251121076233183, |
|
"eval_accuracy": 0.890625, |
|
"eval_loss": 0.3134443759918213, |
|
"eval_runtime": 28.705, |
|
"eval_samples_per_second": 6.689, |
|
"eval_steps_per_second": 0.836, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 3.000896860986547, |
|
"grad_norm": 1.969995379447937, |
|
"learning_rate": 7.57847533632287e-06, |
|
"loss": 0.1334, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 3.0053811659192826, |
|
"grad_norm": 1.8544121980667114, |
|
"learning_rate": 8.026905829596413e-06, |
|
"loss": 0.0963, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 3.009865470852018, |
|
"grad_norm": 3.637763023376465, |
|
"learning_rate": 8.475336322869956e-06, |
|
"loss": 0.062, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 3.0143497757847535, |
|
"grad_norm": 1.3532997369766235, |
|
"learning_rate": 8.923766816143498e-06, |
|
"loss": 0.0643, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 3.0188340807174887, |
|
"grad_norm": 0.3547247052192688, |
|
"learning_rate": 9.372197309417041e-06, |
|
"loss": 0.0566, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 3.0233183856502244, |
|
"grad_norm": 6.522655487060547, |
|
"learning_rate": 9.820627802690584e-06, |
|
"loss": 0.0656, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 3.0251121076233183, |
|
"eval_accuracy": 0.9375, |
|
"eval_loss": 0.2310737818479538, |
|
"eval_runtime": 29.4697, |
|
"eval_samples_per_second": 6.515, |
|
"eval_steps_per_second": 0.814, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 4.002690582959641, |
|
"grad_norm": 0.4656350910663605, |
|
"learning_rate": 9.970104633781765e-06, |
|
"loss": 0.0137, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 4.0071748878923765, |
|
"grad_norm": 0.16051346063613892, |
|
"learning_rate": 9.920279023418038e-06, |
|
"loss": 0.0085, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 4.011659192825112, |
|
"grad_norm": 0.23789650201797485, |
|
"learning_rate": 9.87045341305431e-06, |
|
"loss": 0.0135, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 4.016143497757848, |
|
"grad_norm": 0.908409595489502, |
|
"learning_rate": 9.820627802690584e-06, |
|
"loss": 0.0069, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 4.020627802690583, |
|
"grad_norm": 0.11391621083021164, |
|
"learning_rate": 9.770802192326857e-06, |
|
"loss": 0.0106, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 4.025112107623318, |
|
"grad_norm": 0.08297699689865112, |
|
"learning_rate": 9.72097658196313e-06, |
|
"loss": 0.0046, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 4.025112107623318, |
|
"eval_accuracy": 0.9427083333333334, |
|
"eval_loss": 0.28078916668891907, |
|
"eval_runtime": 29.5695, |
|
"eval_samples_per_second": 6.493, |
|
"eval_steps_per_second": 0.812, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 5.004484304932736, |
|
"grad_norm": 0.06622310727834702, |
|
"learning_rate": 9.671150971599403e-06, |
|
"loss": 0.0044, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 5.00896860986547, |
|
"grad_norm": 0.0407695509493351, |
|
"learning_rate": 9.621325361235676e-06, |
|
"loss": 0.0022, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 5.013452914798206, |
|
"grad_norm": 0.039162054657936096, |
|
"learning_rate": 9.571499750871948e-06, |
|
"loss": 0.0021, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 5.017937219730942, |
|
"grad_norm": 0.03691912814974785, |
|
"learning_rate": 9.521674140508222e-06, |
|
"loss": 0.0016, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 5.022421524663677, |
|
"grad_norm": 0.02548583783209324, |
|
"learning_rate": 9.471848530144496e-06, |
|
"loss": 0.0012, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 5.025112107623318, |
|
"eval_accuracy": 0.9427083333333334, |
|
"eval_loss": 0.2592454254627228, |
|
"eval_runtime": 29.1301, |
|
"eval_samples_per_second": 6.591, |
|
"eval_steps_per_second": 0.824, |
|
"step": 336 |
|
}, |
|
{ |
|
"epoch": 6.001793721973094, |
|
"grad_norm": 0.022539280354976654, |
|
"learning_rate": 9.422022919780767e-06, |
|
"loss": 0.0009, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 6.00627802690583, |
|
"grad_norm": 0.024405544623732567, |
|
"learning_rate": 9.372197309417041e-06, |
|
"loss": 0.001, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 6.010762331838565, |
|
"grad_norm": 0.020192157477140427, |
|
"learning_rate": 9.322371699053315e-06, |
|
"loss": 0.0008, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 6.0152466367713, |
|
"grad_norm": 0.01488098967820406, |
|
"learning_rate": 9.272546088689587e-06, |
|
"loss": 0.0008, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 6.019730941704036, |
|
"grad_norm": 0.033892132341861725, |
|
"learning_rate": 9.22272047832586e-06, |
|
"loss": 0.0009, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 6.024215246636771, |
|
"grad_norm": 0.01757296919822693, |
|
"learning_rate": 9.172894867962134e-06, |
|
"loss": 0.0007, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 6.025112107623318, |
|
"eval_accuracy": 0.9479166666666666, |
|
"eval_loss": 0.2576257288455963, |
|
"eval_runtime": 29.5146, |
|
"eval_samples_per_second": 6.505, |
|
"eval_steps_per_second": 0.813, |
|
"step": 392 |
|
}, |
|
{ |
|
"epoch": 7.003587443946189, |
|
"grad_norm": 0.013150567188858986, |
|
"learning_rate": 9.123069257598406e-06, |
|
"loss": 0.0007, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 7.0080717488789235, |
|
"grad_norm": 0.015239037573337555, |
|
"learning_rate": 9.07324364723468e-06, |
|
"loss": 0.0006, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 7.012556053811659, |
|
"grad_norm": 0.018098143860697746, |
|
"learning_rate": 9.023418036870953e-06, |
|
"loss": 0.0006, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 7.017040358744395, |
|
"grad_norm": 0.010625138878822327, |
|
"learning_rate": 8.973592426507225e-06, |
|
"loss": 0.0005, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 7.0215246636771305, |
|
"grad_norm": 0.014908026903867722, |
|
"learning_rate": 8.923766816143498e-06, |
|
"loss": 0.0005, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 7.025112107623318, |
|
"eval_accuracy": 0.9479166666666666, |
|
"eval_loss": 0.2630920112133026, |
|
"eval_runtime": 28.1464, |
|
"eval_samples_per_second": 6.821, |
|
"eval_steps_per_second": 0.853, |
|
"step": 448 |
|
}, |
|
{ |
|
"epoch": 8.000896860986547, |
|
"grad_norm": 0.014149523340165615, |
|
"learning_rate": 8.873941205779772e-06, |
|
"loss": 0.0005, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 8.005381165919282, |
|
"grad_norm": 0.013675632886588573, |
|
"learning_rate": 8.824115595416045e-06, |
|
"loss": 0.0005, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 8.009865470852018, |
|
"grad_norm": 0.021204130724072456, |
|
"learning_rate": 8.774289985052317e-06, |
|
"loss": 0.0005, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 8.014349775784753, |
|
"grad_norm": 0.01068932469934225, |
|
"learning_rate": 8.724464374688591e-06, |
|
"loss": 0.0004, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 8.01883408071749, |
|
"grad_norm": 0.011664162389934063, |
|
"learning_rate": 8.674638764324865e-06, |
|
"loss": 0.0004, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 8.023318385650224, |
|
"grad_norm": 0.015364346094429493, |
|
"learning_rate": 8.624813153961136e-06, |
|
"loss": 0.0004, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 8.025112107623318, |
|
"eval_accuracy": 0.9479166666666666, |
|
"eval_loss": 0.2744747996330261, |
|
"eval_runtime": 30.499, |
|
"eval_samples_per_second": 6.295, |
|
"eval_steps_per_second": 0.787, |
|
"step": 504 |
|
}, |
|
{ |
|
"epoch": 9.00269058295964, |
|
"grad_norm": 0.009640182368457317, |
|
"learning_rate": 8.57498754359741e-06, |
|
"loss": 0.0004, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 9.007174887892377, |
|
"grad_norm": 0.008297263644635677, |
|
"learning_rate": 8.525161933233684e-06, |
|
"loss": 0.0004, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 9.011659192825112, |
|
"grad_norm": 0.00965182390064001, |
|
"learning_rate": 8.475336322869956e-06, |
|
"loss": 0.0004, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 9.016143497757847, |
|
"grad_norm": 0.008539954200387001, |
|
"learning_rate": 8.42551071250623e-06, |
|
"loss": 0.0003, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 9.020627802690584, |
|
"grad_norm": 0.01318803709000349, |
|
"learning_rate": 8.375685102142503e-06, |
|
"loss": 0.0003, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 9.025112107623318, |
|
"grad_norm": 0.013389434665441513, |
|
"learning_rate": 8.325859491778775e-06, |
|
"loss": 0.0004, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 9.025112107623318, |
|
"eval_accuracy": 0.9479166666666666, |
|
"eval_loss": 0.28136080503463745, |
|
"eval_runtime": 28.6966, |
|
"eval_samples_per_second": 6.691, |
|
"eval_steps_per_second": 0.836, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 10.004484304932735, |
|
"grad_norm": 0.0122822942212224, |
|
"learning_rate": 8.276033881415048e-06, |
|
"loss": 0.0003, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 10.008968609865471, |
|
"grad_norm": 0.030896110460162163, |
|
"learning_rate": 8.226208271051322e-06, |
|
"loss": 0.0003, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 10.013452914798206, |
|
"grad_norm": 0.012824727222323418, |
|
"learning_rate": 8.176382660687594e-06, |
|
"loss": 0.0003, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 10.01793721973094, |
|
"grad_norm": 0.00790180079638958, |
|
"learning_rate": 8.126557050323867e-06, |
|
"loss": 0.0003, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 10.022421524663677, |
|
"grad_norm": 0.011255297809839249, |
|
"learning_rate": 8.076731439960141e-06, |
|
"loss": 0.0003, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 10.025112107623318, |
|
"eval_accuracy": 0.9479166666666666, |
|
"eval_loss": 0.286301851272583, |
|
"eval_runtime": 30.4742, |
|
"eval_samples_per_second": 6.3, |
|
"eval_steps_per_second": 0.788, |
|
"step": 616 |
|
}, |
|
{ |
|
"epoch": 11.001793721973094, |
|
"grad_norm": 0.009772414341568947, |
|
"learning_rate": 8.026905829596413e-06, |
|
"loss": 0.0003, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 11.00627802690583, |
|
"grad_norm": 0.006887427996844053, |
|
"learning_rate": 7.977080219232686e-06, |
|
"loss": 0.0003, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 11.010762331838565, |
|
"grad_norm": 0.0069274078123271465, |
|
"learning_rate": 7.92725460886896e-06, |
|
"loss": 0.0003, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 11.0152466367713, |
|
"grad_norm": 0.010375886224210262, |
|
"learning_rate": 7.877428998505232e-06, |
|
"loss": 0.0003, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 11.019730941704037, |
|
"grad_norm": 0.006271818652749062, |
|
"learning_rate": 7.827603388141506e-06, |
|
"loss": 0.0002, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 11.024215246636771, |
|
"grad_norm": 0.006681330036371946, |
|
"learning_rate": 7.77777777777778e-06, |
|
"loss": 0.0002, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 11.025112107623318, |
|
"eval_accuracy": 0.9479166666666666, |
|
"eval_loss": 0.28504976630210876, |
|
"eval_runtime": 30.651, |
|
"eval_samples_per_second": 6.264, |
|
"eval_steps_per_second": 0.783, |
|
"step": 672 |
|
}, |
|
{ |
|
"epoch": 11.025112107623318, |
|
"step": 672, |
|
"total_flos": 1.336778091585287e+19, |
|
"train_loss": 0.12756322894849637, |
|
"train_runtime": 3434.3534, |
|
"train_samples_per_second": 10.389, |
|
"train_steps_per_second": 0.649 |
|
}, |
|
{ |
|
"epoch": 11.025112107623318, |
|
"eval_accuracy": 0.958974358974359, |
|
"eval_loss": 0.1425883024930954, |
|
"eval_runtime": 29.0317, |
|
"eval_samples_per_second": 6.717, |
|
"eval_steps_per_second": 0.861, |
|
"step": 672 |
|
}, |
|
{ |
|
"epoch": 11.025112107623318, |
|
"eval_accuracy": 0.958974358974359, |
|
"eval_loss": 0.14259304106235504, |
|
"eval_runtime": 28.0432, |
|
"eval_samples_per_second": 6.954, |
|
"eval_steps_per_second": 0.891, |
|
"step": 672 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 2230, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 9223372036854775807, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 5, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 5 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.336778091585287e+19, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|