|
{ |
|
"best_global_step": 288, |
|
"best_metric": 0.7926829268292683, |
|
"best_model_checkpoint": "ALL_RGBCROP_ori16F-8B16F-GACWD5lrDO/checkpoint-288", |
|
"epoch": 10.025, |
|
"eval_steps": 500, |
|
"global_step": 528, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.005208333333333333, |
|
"grad_norm": 6.546933174133301, |
|
"learning_rate": 4.6875000000000006e-07, |
|
"loss": 0.7185, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.010416666666666666, |
|
"grad_norm": 6.9191694259643555, |
|
"learning_rate": 9.895833333333333e-07, |
|
"loss": 0.6892, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.015625, |
|
"grad_norm": 6.326386451721191, |
|
"learning_rate": 1.5104166666666667e-06, |
|
"loss": 0.7244, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.020833333333333332, |
|
"grad_norm": 8.42184829711914, |
|
"learning_rate": 2.0312500000000002e-06, |
|
"loss": 0.694, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.025, |
|
"eval_accuracy": 0.49390243902439024, |
|
"eval_loss": 0.7127410769462585, |
|
"eval_runtime": 25.0192, |
|
"eval_samples_per_second": 6.555, |
|
"eval_steps_per_second": 0.839, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 1.0010416666666666, |
|
"grad_norm": 6.573472499847412, |
|
"learning_rate": 2.5520833333333334e-06, |
|
"loss": 0.6915, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 1.00625, |
|
"grad_norm": 6.270707130432129, |
|
"learning_rate": 3.072916666666667e-06, |
|
"loss": 0.6531, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 1.0114583333333333, |
|
"grad_norm": 6.025201797485352, |
|
"learning_rate": 3.59375e-06, |
|
"loss": 0.632, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 1.0166666666666666, |
|
"grad_norm": 6.816848278045654, |
|
"learning_rate": 4.114583333333334e-06, |
|
"loss": 0.6395, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 1.021875, |
|
"grad_norm": 6.194951057434082, |
|
"learning_rate": 4.635416666666667e-06, |
|
"loss": 0.6395, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 1.025, |
|
"eval_accuracy": 0.5975609756097561, |
|
"eval_loss": 0.6772211790084839, |
|
"eval_runtime": 25.1799, |
|
"eval_samples_per_second": 6.513, |
|
"eval_steps_per_second": 0.834, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 2.002083333333333, |
|
"grad_norm": 6.220993995666504, |
|
"learning_rate": 5.156250000000001e-06, |
|
"loss": 0.6222, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 2.0072916666666667, |
|
"grad_norm": 6.413397312164307, |
|
"learning_rate": 5.677083333333334e-06, |
|
"loss": 0.6061, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 2.0125, |
|
"grad_norm": 7.089827537536621, |
|
"learning_rate": 6.197916666666667e-06, |
|
"loss": 0.5924, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 2.017708333333333, |
|
"grad_norm": 6.679469108581543, |
|
"learning_rate": 6.718750000000001e-06, |
|
"loss": 0.5323, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 2.0229166666666667, |
|
"grad_norm": 5.224033832550049, |
|
"learning_rate": 7.239583333333334e-06, |
|
"loss": 0.4871, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 2.025, |
|
"eval_accuracy": 0.6707317073170732, |
|
"eval_loss": 0.6142382025718689, |
|
"eval_runtime": 25.3499, |
|
"eval_samples_per_second": 6.469, |
|
"eval_steps_per_second": 0.828, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 3.003125, |
|
"grad_norm": 6.379062652587891, |
|
"learning_rate": 7.760416666666666e-06, |
|
"loss": 0.4493, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 3.0083333333333333, |
|
"grad_norm": 5.42802619934082, |
|
"learning_rate": 8.281250000000001e-06, |
|
"loss": 0.4371, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 3.013541666666667, |
|
"grad_norm": 6.1493096351623535, |
|
"learning_rate": 8.802083333333335e-06, |
|
"loss": 0.402, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 3.01875, |
|
"grad_norm": 5.070779323577881, |
|
"learning_rate": 9.322916666666667e-06, |
|
"loss": 0.4197, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 3.0239583333333333, |
|
"grad_norm": 6.595272064208984, |
|
"learning_rate": 9.84375e-06, |
|
"loss": 0.4162, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 3.025, |
|
"eval_accuracy": 0.725609756097561, |
|
"eval_loss": 0.5417684316635132, |
|
"eval_runtime": 24.8265, |
|
"eval_samples_per_second": 6.606, |
|
"eval_steps_per_second": 0.846, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 4.004166666666666, |
|
"grad_norm": 12.621065139770508, |
|
"learning_rate": 9.959490740740742e-06, |
|
"loss": 0.3567, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 4.009375, |
|
"grad_norm": 7.9822893142700195, |
|
"learning_rate": 9.901620370370372e-06, |
|
"loss": 0.319, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 4.014583333333333, |
|
"grad_norm": 5.883790969848633, |
|
"learning_rate": 9.84375e-06, |
|
"loss": 0.2885, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 4.019791666666666, |
|
"grad_norm": 10.191251754760742, |
|
"learning_rate": 9.78587962962963e-06, |
|
"loss": 0.2845, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 4.025, |
|
"grad_norm": 11.520015716552734, |
|
"learning_rate": 9.72800925925926e-06, |
|
"loss": 0.2336, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 4.025, |
|
"eval_accuracy": 0.7621951219512195, |
|
"eval_loss": 0.5030018091201782, |
|
"eval_runtime": 25.1022, |
|
"eval_samples_per_second": 6.533, |
|
"eval_steps_per_second": 0.837, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 5.005208333333333, |
|
"grad_norm": 9.863582611083984, |
|
"learning_rate": 9.670138888888889e-06, |
|
"loss": 0.2117, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 5.010416666666667, |
|
"grad_norm": 9.696749687194824, |
|
"learning_rate": 9.612268518518519e-06, |
|
"loss": 0.2162, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 5.015625, |
|
"grad_norm": 5.5694580078125, |
|
"learning_rate": 9.554398148148149e-06, |
|
"loss": 0.224, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 5.020833333333333, |
|
"grad_norm": 2.6210453510284424, |
|
"learning_rate": 9.496527777777779e-06, |
|
"loss": 0.159, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 5.025, |
|
"eval_accuracy": 0.7926829268292683, |
|
"eval_loss": 0.5044757723808289, |
|
"eval_runtime": 23.3535, |
|
"eval_samples_per_second": 7.023, |
|
"eval_steps_per_second": 0.899, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 6.001041666666667, |
|
"grad_norm": 7.435839653015137, |
|
"learning_rate": 9.438657407407409e-06, |
|
"loss": 0.1674, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 6.00625, |
|
"grad_norm": 39.46736526489258, |
|
"learning_rate": 9.380787037037039e-06, |
|
"loss": 0.1278, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 6.011458333333334, |
|
"grad_norm": 1.9832502603530884, |
|
"learning_rate": 9.322916666666667e-06, |
|
"loss": 0.1017, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 6.016666666666667, |
|
"grad_norm": 7.218878269195557, |
|
"learning_rate": 9.265046296296297e-06, |
|
"loss": 0.1574, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 6.021875, |
|
"grad_norm": 6.560127258300781, |
|
"learning_rate": 9.207175925925927e-06, |
|
"loss": 0.1486, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 6.025, |
|
"eval_accuracy": 0.7804878048780488, |
|
"eval_loss": 0.5186034440994263, |
|
"eval_runtime": 25.0796, |
|
"eval_samples_per_second": 6.539, |
|
"eval_steps_per_second": 0.837, |
|
"step": 336 |
|
}, |
|
{ |
|
"epoch": 7.002083333333333, |
|
"grad_norm": 1.7817628383636475, |
|
"learning_rate": 9.149305555555557e-06, |
|
"loss": 0.0987, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 7.007291666666666, |
|
"grad_norm": 10.60669994354248, |
|
"learning_rate": 9.091435185185185e-06, |
|
"loss": 0.1023, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 7.0125, |
|
"grad_norm": 0.7290067076683044, |
|
"learning_rate": 9.033564814814815e-06, |
|
"loss": 0.097, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 7.017708333333333, |
|
"grad_norm": 5.742852687835693, |
|
"learning_rate": 8.975694444444445e-06, |
|
"loss": 0.1295, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 7.022916666666666, |
|
"grad_norm": 1.4199665784835815, |
|
"learning_rate": 8.917824074074075e-06, |
|
"loss": 0.0997, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 7.025, |
|
"eval_accuracy": 0.7865853658536586, |
|
"eval_loss": 0.5648921728134155, |
|
"eval_runtime": 25.5274, |
|
"eval_samples_per_second": 6.424, |
|
"eval_steps_per_second": 0.823, |
|
"step": 384 |
|
}, |
|
{ |
|
"epoch": 8.003125, |
|
"grad_norm": 1.0618127584457397, |
|
"learning_rate": 8.859953703703705e-06, |
|
"loss": 0.0975, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 8.008333333333333, |
|
"grad_norm": 0.4267812967300415, |
|
"learning_rate": 8.802083333333335e-06, |
|
"loss": 0.0738, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 8.013541666666667, |
|
"grad_norm": 16.63909339904785, |
|
"learning_rate": 8.744212962962963e-06, |
|
"loss": 0.0395, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 8.01875, |
|
"grad_norm": 16.013010025024414, |
|
"learning_rate": 8.686342592592593e-06, |
|
"loss": 0.0566, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 8.023958333333333, |
|
"grad_norm": 3.0400283336639404, |
|
"learning_rate": 8.628472222222223e-06, |
|
"loss": 0.07, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 8.025, |
|
"eval_accuracy": 0.7804878048780488, |
|
"eval_loss": 0.6179517507553101, |
|
"eval_runtime": 24.5292, |
|
"eval_samples_per_second": 6.686, |
|
"eval_steps_per_second": 0.856, |
|
"step": 432 |
|
}, |
|
{ |
|
"epoch": 9.004166666666666, |
|
"grad_norm": 14.060826301574707, |
|
"learning_rate": 8.570601851851853e-06, |
|
"loss": 0.0566, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 9.009375, |
|
"grad_norm": 5.963779926300049, |
|
"learning_rate": 8.512731481481481e-06, |
|
"loss": 0.0421, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 9.014583333333333, |
|
"grad_norm": 1.6425102949142456, |
|
"learning_rate": 8.454861111111111e-06, |
|
"loss": 0.0395, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 9.019791666666666, |
|
"grad_norm": 7.537419319152832, |
|
"learning_rate": 8.396990740740741e-06, |
|
"loss": 0.0402, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 9.025, |
|
"grad_norm": 17.922319412231445, |
|
"learning_rate": 8.339120370370371e-06, |
|
"loss": 0.0377, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 9.025, |
|
"eval_accuracy": 0.7926829268292683, |
|
"eval_loss": 0.6363745927810669, |
|
"eval_runtime": 24.3896, |
|
"eval_samples_per_second": 6.724, |
|
"eval_steps_per_second": 0.861, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 10.005208333333334, |
|
"grad_norm": 0.31738758087158203, |
|
"learning_rate": 8.281250000000001e-06, |
|
"loss": 0.0276, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 10.010416666666666, |
|
"grad_norm": 2.9940338134765625, |
|
"learning_rate": 8.22337962962963e-06, |
|
"loss": 0.0255, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 10.015625, |
|
"grad_norm": 24.363996505737305, |
|
"learning_rate": 8.16550925925926e-06, |
|
"loss": 0.0269, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 10.020833333333334, |
|
"grad_norm": 0.5430907011032104, |
|
"learning_rate": 8.10763888888889e-06, |
|
"loss": 0.0103, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 10.025, |
|
"eval_accuracy": 0.7865853658536586, |
|
"eval_loss": 0.7102433443069458, |
|
"eval_runtime": 24.5626, |
|
"eval_samples_per_second": 6.677, |
|
"eval_steps_per_second": 0.855, |
|
"step": 528 |
|
}, |
|
{ |
|
"epoch": 10.025, |
|
"step": 528, |
|
"total_flos": 1.0526753651857293e+19, |
|
"train_loss": 0.2883184655625938, |
|
"train_runtime": 2976.1505, |
|
"train_samples_per_second": 10.322, |
|
"train_steps_per_second": 0.645 |
|
}, |
|
{ |
|
"epoch": 10.025, |
|
"eval_accuracy": 0.8143712574850299, |
|
"eval_loss": 0.40913674235343933, |
|
"eval_runtime": 27.9503, |
|
"eval_samples_per_second": 5.975, |
|
"eval_steps_per_second": 0.751, |
|
"step": 528 |
|
}, |
|
{ |
|
"epoch": 10.025, |
|
"eval_accuracy": 0.8143712574850299, |
|
"eval_loss": 0.4080381691455841, |
|
"eval_runtime": 26.7248, |
|
"eval_samples_per_second": 6.249, |
|
"eval_steps_per_second": 0.786, |
|
"step": 528 |
|
}, |
|
{ |
|
"epoch": 10.025, |
|
"eval_accuracy": 0.8143712574850299, |
|
"eval_loss": 0.4105586111545563, |
|
"eval_runtime": 25.8358, |
|
"eval_samples_per_second": 6.464, |
|
"eval_steps_per_second": 0.813, |
|
"step": 528 |
|
}, |
|
{ |
|
"epoch": 10.025, |
|
"eval_accuracy": 0.8143712574850299, |
|
"eval_loss": 0.40935227274894714, |
|
"eval_runtime": 27.3841, |
|
"eval_samples_per_second": 6.098, |
|
"eval_steps_per_second": 0.767, |
|
"step": 528 |
|
}, |
|
{ |
|
"epoch": 10.025, |
|
"eval_accuracy": 0.8143712574850299, |
|
"eval_loss": 0.40921783447265625, |
|
"eval_runtime": 26.5963, |
|
"eval_samples_per_second": 6.279, |
|
"eval_steps_per_second": 0.79, |
|
"step": 528 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 1920, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 9223372036854775807, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 5, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 5 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.0526753651857293e+19, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|