{ "best_metric": 0.92511454202441, "best_model_checkpoint": "vit-msn-small-wbc-blur-detector/checkpoint-1040", "epoch": 5.0, "eval_steps": 500, "global_step": 1040, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.04807692307692308, "grad_norm": 8.808553695678711, "learning_rate": 4.807692307692308e-06, "loss": 0.5649, "step": 10 }, { "epoch": 0.09615384615384616, "grad_norm": 10.409209251403809, "learning_rate": 9.615384615384616e-06, "loss": 0.5411, "step": 20 }, { "epoch": 0.14423076923076922, "grad_norm": 11.638080596923828, "learning_rate": 1.4423076923076923e-05, "loss": 0.4686, "step": 30 }, { "epoch": 0.19230769230769232, "grad_norm": 8.448880195617676, "learning_rate": 1.923076923076923e-05, "loss": 0.467, "step": 40 }, { "epoch": 0.2403846153846154, "grad_norm": 10.3389253616333, "learning_rate": 2.4038461538461542e-05, "loss": 0.4375, "step": 50 }, { "epoch": 0.28846153846153844, "grad_norm": 14.958161354064941, "learning_rate": 2.8846153846153845e-05, "loss": 0.4374, "step": 60 }, { "epoch": 0.33653846153846156, "grad_norm": 5.999127388000488, "learning_rate": 3.365384615384616e-05, "loss": 0.4244, "step": 70 }, { "epoch": 0.38461538461538464, "grad_norm": 15.743879318237305, "learning_rate": 3.846153846153846e-05, "loss": 0.4003, "step": 80 }, { "epoch": 0.4326923076923077, "grad_norm": 11.51312255859375, "learning_rate": 4.326923076923077e-05, "loss": 0.4035, "step": 90 }, { "epoch": 0.4807692307692308, "grad_norm": 11.792448043823242, "learning_rate": 4.8076923076923084e-05, "loss": 0.3758, "step": 100 }, { "epoch": 0.5288461538461539, "grad_norm": 16.453128814697266, "learning_rate": 4.9679487179487185e-05, "loss": 0.4207, "step": 110 }, { "epoch": 0.5769230769230769, "grad_norm": 12.526341438293457, "learning_rate": 4.9145299145299147e-05, "loss": 0.4373, "step": 120 }, { "epoch": 0.625, "grad_norm": 14.725676536560059, "learning_rate": 4.8611111111111115e-05, "loss": 0.3985, "step": 130 }, { "epoch": 0.6730769230769231, "grad_norm": 8.387413024902344, "learning_rate": 4.8076923076923084e-05, "loss": 0.3891, "step": 140 }, { "epoch": 0.7211538461538461, "grad_norm": 9.76562213897705, "learning_rate": 4.7542735042735045e-05, "loss": 0.3595, "step": 150 }, { "epoch": 0.7692307692307693, "grad_norm": 6.494562149047852, "learning_rate": 4.700854700854701e-05, "loss": 0.3769, "step": 160 }, { "epoch": 0.8173076923076923, "grad_norm": 4.907495021820068, "learning_rate": 4.6474358974358976e-05, "loss": 0.3606, "step": 170 }, { "epoch": 0.8653846153846154, "grad_norm": 10.716683387756348, "learning_rate": 4.594017094017094e-05, "loss": 0.4022, "step": 180 }, { "epoch": 0.9134615384615384, "grad_norm": 4.651486873626709, "learning_rate": 4.5405982905982906e-05, "loss": 0.3586, "step": 190 }, { "epoch": 0.9615384615384616, "grad_norm": 9.122421264648438, "learning_rate": 4.4871794871794874e-05, "loss": 0.3471, "step": 200 }, { "epoch": 1.0, "eval_accuracy": 0.8939668116890617, "eval_loss": 0.29600855708122253, "eval_runtime": 79.6532, "eval_samples_per_second": 326.076, "eval_steps_per_second": 5.097, "step": 208 }, { "epoch": 1.0096153846153846, "grad_norm": 4.985302448272705, "learning_rate": 4.4337606837606836e-05, "loss": 0.3561, "step": 210 }, { "epoch": 1.0576923076923077, "grad_norm": 8.867368698120117, "learning_rate": 4.3803418803418805e-05, "loss": 0.3629, "step": 220 }, { "epoch": 1.1057692307692308, "grad_norm": 5.275181770324707, "learning_rate": 4.326923076923077e-05, "loss": 0.3352, "step": 230 }, { "epoch": 1.1538461538461537, "grad_norm": 7.328935146331787, "learning_rate": 4.2735042735042735e-05, "loss": 0.3593, "step": 240 }, { "epoch": 1.2019230769230769, "grad_norm": 7.934610366821289, "learning_rate": 4.2200854700854704e-05, "loss": 0.3651, "step": 250 }, { "epoch": 1.25, "grad_norm": 4.802842140197754, "learning_rate": 4.166666666666667e-05, "loss": 0.3517, "step": 260 }, { "epoch": 1.2980769230769231, "grad_norm": 5.414445400238037, "learning_rate": 4.1132478632478634e-05, "loss": 0.3527, "step": 270 }, { "epoch": 1.3461538461538463, "grad_norm": 6.663522243499756, "learning_rate": 4.05982905982906e-05, "loss": 0.3397, "step": 280 }, { "epoch": 1.3942307692307692, "grad_norm": 4.669881820678711, "learning_rate": 4.006410256410257e-05, "loss": 0.3378, "step": 290 }, { "epoch": 1.4423076923076923, "grad_norm": 5.708807945251465, "learning_rate": 3.952991452991453e-05, "loss": 0.3119, "step": 300 }, { "epoch": 1.4903846153846154, "grad_norm": 5.911713600158691, "learning_rate": 3.89957264957265e-05, "loss": 0.329, "step": 310 }, { "epoch": 1.5384615384615383, "grad_norm": 4.569310188293457, "learning_rate": 3.846153846153846e-05, "loss": 0.3348, "step": 320 }, { "epoch": 1.5865384615384617, "grad_norm": 6.366770267486572, "learning_rate": 3.7927350427350425e-05, "loss": 0.3268, "step": 330 }, { "epoch": 1.6346153846153846, "grad_norm": 4.477969646453857, "learning_rate": 3.739316239316239e-05, "loss": 0.3327, "step": 340 }, { "epoch": 1.6826923076923077, "grad_norm": 5.906105995178223, "learning_rate": 3.685897435897436e-05, "loss": 0.316, "step": 350 }, { "epoch": 1.7307692307692308, "grad_norm": 4.78982400894165, "learning_rate": 3.6324786324786323e-05, "loss": 0.339, "step": 360 }, { "epoch": 1.7788461538461537, "grad_norm": 4.8963117599487305, "learning_rate": 3.579059829059829e-05, "loss": 0.3247, "step": 370 }, { "epoch": 1.8269230769230769, "grad_norm": 4.7771501541137695, "learning_rate": 3.525641025641026e-05, "loss": 0.3168, "step": 380 }, { "epoch": 1.875, "grad_norm": 5.485278606414795, "learning_rate": 3.472222222222222e-05, "loss": 0.3107, "step": 390 }, { "epoch": 1.9230769230769231, "grad_norm": 5.954066753387451, "learning_rate": 3.418803418803419e-05, "loss": 0.3196, "step": 400 }, { "epoch": 1.9711538461538463, "grad_norm": 4.826352119445801, "learning_rate": 3.365384615384616e-05, "loss": 0.3113, "step": 410 }, { "epoch": 2.0, "eval_accuracy": 0.9087898972009394, "eval_loss": 0.2550640106201172, "eval_runtime": 77.7682, "eval_samples_per_second": 333.98, "eval_steps_per_second": 5.221, "step": 416 }, { "epoch": 2.019230769230769, "grad_norm": 4.100191116333008, "learning_rate": 3.311965811965812e-05, "loss": 0.3039, "step": 420 }, { "epoch": 2.0673076923076925, "grad_norm": 5.670529842376709, "learning_rate": 3.258547008547009e-05, "loss": 0.3228, "step": 430 }, { "epoch": 2.1153846153846154, "grad_norm": 4.010565757751465, "learning_rate": 3.205128205128206e-05, "loss": 0.3096, "step": 440 }, { "epoch": 2.1634615384615383, "grad_norm": 5.74422550201416, "learning_rate": 3.151709401709402e-05, "loss": 0.3119, "step": 450 }, { "epoch": 2.2115384615384617, "grad_norm": 3.280287742614746, "learning_rate": 3.098290598290599e-05, "loss": 0.3305, "step": 460 }, { "epoch": 2.2596153846153846, "grad_norm": 4.927590847015381, "learning_rate": 3.0448717948717947e-05, "loss": 0.3034, "step": 470 }, { "epoch": 2.3076923076923075, "grad_norm": 3.297640562057495, "learning_rate": 2.9914529914529915e-05, "loss": 0.3081, "step": 480 }, { "epoch": 2.355769230769231, "grad_norm": 4.635345458984375, "learning_rate": 2.9380341880341884e-05, "loss": 0.3096, "step": 490 }, { "epoch": 2.4038461538461537, "grad_norm": 4.222609519958496, "learning_rate": 2.8846153846153845e-05, "loss": 0.3242, "step": 500 }, { "epoch": 2.451923076923077, "grad_norm": 5.433651924133301, "learning_rate": 2.8311965811965814e-05, "loss": 0.2856, "step": 510 }, { "epoch": 2.5, "grad_norm": 3.512269973754883, "learning_rate": 2.777777777777778e-05, "loss": 0.3165, "step": 520 }, { "epoch": 2.5480769230769234, "grad_norm": 4.383204936981201, "learning_rate": 2.724358974358974e-05, "loss": 0.3309, "step": 530 }, { "epoch": 2.5961538461538463, "grad_norm": 3.9306111335754395, "learning_rate": 2.670940170940171e-05, "loss": 0.3012, "step": 540 }, { "epoch": 2.644230769230769, "grad_norm": 4.230567455291748, "learning_rate": 2.6175213675213678e-05, "loss": 0.2931, "step": 550 }, { "epoch": 2.6923076923076925, "grad_norm": 4.904375076293945, "learning_rate": 2.564102564102564e-05, "loss": 0.3042, "step": 560 }, { "epoch": 2.7403846153846154, "grad_norm": 4.499465465545654, "learning_rate": 2.5106837606837608e-05, "loss": 0.2904, "step": 570 }, { "epoch": 2.7884615384615383, "grad_norm": 4.0428242683410645, "learning_rate": 2.4572649572649573e-05, "loss": 0.3019, "step": 580 }, { "epoch": 2.8365384615384617, "grad_norm": 5.506063938140869, "learning_rate": 2.4038461538461542e-05, "loss": 0.2976, "step": 590 }, { "epoch": 2.8846153846153846, "grad_norm": 3.252035617828369, "learning_rate": 2.3504273504273504e-05, "loss": 0.2874, "step": 600 }, { "epoch": 2.9326923076923075, "grad_norm": 3.8732314109802246, "learning_rate": 2.297008547008547e-05, "loss": 0.3022, "step": 610 }, { "epoch": 2.980769230769231, "grad_norm": 4.38186502456665, "learning_rate": 2.2435897435897437e-05, "loss": 0.3104, "step": 620 }, { "epoch": 3.0, "eval_accuracy": 0.9212258884225927, "eval_loss": 0.21063366532325745, "eval_runtime": 77.9718, "eval_samples_per_second": 333.107, "eval_steps_per_second": 5.207, "step": 624 }, { "epoch": 3.0288461538461537, "grad_norm": 3.352865695953369, "learning_rate": 2.1901709401709402e-05, "loss": 0.2898, "step": 630 }, { "epoch": 3.076923076923077, "grad_norm": 4.9008402824401855, "learning_rate": 2.1367521367521368e-05, "loss": 0.326, "step": 640 }, { "epoch": 3.125, "grad_norm": 5.130583763122559, "learning_rate": 2.0833333333333336e-05, "loss": 0.293, "step": 650 }, { "epoch": 3.173076923076923, "grad_norm": 4.387807369232178, "learning_rate": 2.02991452991453e-05, "loss": 0.2777, "step": 660 }, { "epoch": 3.2211538461538463, "grad_norm": 5.279751300811768, "learning_rate": 1.9764957264957266e-05, "loss": 0.3026, "step": 670 }, { "epoch": 3.269230769230769, "grad_norm": 3.5947065353393555, "learning_rate": 1.923076923076923e-05, "loss": 0.2881, "step": 680 }, { "epoch": 3.3173076923076925, "grad_norm": 3.5021965503692627, "learning_rate": 1.8696581196581197e-05, "loss": 0.277, "step": 690 }, { "epoch": 3.3653846153846154, "grad_norm": 5.785918712615967, "learning_rate": 1.8162393162393162e-05, "loss": 0.3025, "step": 700 }, { "epoch": 3.4134615384615383, "grad_norm": 3.405780553817749, "learning_rate": 1.762820512820513e-05, "loss": 0.2825, "step": 710 }, { "epoch": 3.4615384615384617, "grad_norm": 3.8835082054138184, "learning_rate": 1.7094017094017095e-05, "loss": 0.2754, "step": 720 }, { "epoch": 3.5096153846153846, "grad_norm": 3.6545193195343018, "learning_rate": 1.655982905982906e-05, "loss": 0.2736, "step": 730 }, { "epoch": 3.5576923076923075, "grad_norm": 3.788748025894165, "learning_rate": 1.602564102564103e-05, "loss": 0.3084, "step": 740 }, { "epoch": 3.605769230769231, "grad_norm": 3.0023396015167236, "learning_rate": 1.5491452991452994e-05, "loss": 0.2797, "step": 750 }, { "epoch": 3.6538461538461537, "grad_norm": 3.6981606483459473, "learning_rate": 1.4957264957264958e-05, "loss": 0.278, "step": 760 }, { "epoch": 3.7019230769230766, "grad_norm": 4.383445739746094, "learning_rate": 1.4423076923076923e-05, "loss": 0.2602, "step": 770 }, { "epoch": 3.75, "grad_norm": 4.581905364990234, "learning_rate": 1.388888888888889e-05, "loss": 0.2877, "step": 780 }, { "epoch": 3.7980769230769234, "grad_norm": 4.475015640258789, "learning_rate": 1.3354700854700855e-05, "loss": 0.2733, "step": 790 }, { "epoch": 3.8461538461538463, "grad_norm": 4.552759170532227, "learning_rate": 1.282051282051282e-05, "loss": 0.2927, "step": 800 }, { "epoch": 3.894230769230769, "grad_norm": 4.798213958740234, "learning_rate": 1.2286324786324787e-05, "loss": 0.2756, "step": 810 }, { "epoch": 3.9423076923076925, "grad_norm": 3.405367136001587, "learning_rate": 1.1752136752136752e-05, "loss": 0.2893, "step": 820 }, { "epoch": 3.9903846153846154, "grad_norm": 5.285249710083008, "learning_rate": 1.1217948717948719e-05, "loss": 0.2855, "step": 830 }, { "epoch": 4.0, "eval_accuracy": 0.9221114234012243, "eval_loss": 0.21014131605625153, "eval_runtime": 78.3645, "eval_samples_per_second": 331.438, "eval_steps_per_second": 5.181, "step": 832 }, { "epoch": 4.038461538461538, "grad_norm": 4.0211920738220215, "learning_rate": 1.0683760683760684e-05, "loss": 0.2639, "step": 840 }, { "epoch": 4.086538461538462, "grad_norm": 3.0746471881866455, "learning_rate": 1.014957264957265e-05, "loss": 0.2866, "step": 850 }, { "epoch": 4.134615384615385, "grad_norm": 3.945941686630249, "learning_rate": 9.615384615384616e-06, "loss": 0.2624, "step": 860 }, { "epoch": 4.1826923076923075, "grad_norm": 3.996899366378784, "learning_rate": 9.081196581196581e-06, "loss": 0.2795, "step": 870 }, { "epoch": 4.230769230769231, "grad_norm": 3.078308582305908, "learning_rate": 8.547008547008548e-06, "loss": 0.2662, "step": 880 }, { "epoch": 4.278846153846154, "grad_norm": 4.386013507843018, "learning_rate": 8.012820512820515e-06, "loss": 0.2607, "step": 890 }, { "epoch": 4.326923076923077, "grad_norm": 4.05147647857666, "learning_rate": 7.478632478632479e-06, "loss": 0.2837, "step": 900 }, { "epoch": 4.375, "grad_norm": 3.7638943195343018, "learning_rate": 6.944444444444445e-06, "loss": 0.2588, "step": 910 }, { "epoch": 4.423076923076923, "grad_norm": 3.4808619022369385, "learning_rate": 6.41025641025641e-06, "loss": 0.2709, "step": 920 }, { "epoch": 4.471153846153846, "grad_norm": 4.455379486083984, "learning_rate": 5.876068376068376e-06, "loss": 0.2743, "step": 930 }, { "epoch": 4.519230769230769, "grad_norm": 3.4500174522399902, "learning_rate": 5.341880341880342e-06, "loss": 0.2501, "step": 940 }, { "epoch": 4.5673076923076925, "grad_norm": 3.780879020690918, "learning_rate": 4.807692307692308e-06, "loss": 0.2666, "step": 950 }, { "epoch": 4.615384615384615, "grad_norm": 3.8015003204345703, "learning_rate": 4.273504273504274e-06, "loss": 0.2516, "step": 960 }, { "epoch": 4.663461538461538, "grad_norm": 3.866867780685425, "learning_rate": 3.7393162393162394e-06, "loss": 0.2673, "step": 970 }, { "epoch": 4.711538461538462, "grad_norm": 3.153127431869507, "learning_rate": 3.205128205128205e-06, "loss": 0.2472, "step": 980 }, { "epoch": 4.759615384615385, "grad_norm": 3.0481035709381104, "learning_rate": 2.670940170940171e-06, "loss": 0.2669, "step": 990 }, { "epoch": 4.8076923076923075, "grad_norm": 3.5989151000976562, "learning_rate": 2.136752136752137e-06, "loss": 0.2748, "step": 1000 }, { "epoch": 4.855769230769231, "grad_norm": 4.3564839363098145, "learning_rate": 1.6025641025641025e-06, "loss": 0.2651, "step": 1010 }, { "epoch": 4.903846153846154, "grad_norm": 3.6109776496887207, "learning_rate": 1.0683760683760685e-06, "loss": 0.2631, "step": 1020 }, { "epoch": 4.951923076923077, "grad_norm": 3.1704578399658203, "learning_rate": 5.341880341880342e-07, "loss": 0.2665, "step": 1030 }, { "epoch": 5.0, "grad_norm": 3.165510654449463, "learning_rate": 0.0, "loss": 0.2497, "step": 1040 }, { "epoch": 5.0, "eval_accuracy": 0.92511454202441, "eval_loss": 0.20453155040740967, "eval_runtime": 78.1441, "eval_samples_per_second": 332.373, "eval_steps_per_second": 5.196, "step": 1040 }, { "epoch": 5.0, "step": 1040, "total_flos": 5.20812700794667e+18, "train_loss": 0.32158288405491753, "train_runtime": 2381.0335, "train_samples_per_second": 111.775, "train_steps_per_second": 0.437 } ], "logging_steps": 10, "max_steps": 1040, "num_input_tokens_seen": 0, "num_train_epochs": 5, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 5.20812700794667e+18, "train_batch_size": 64, "trial_name": null, "trial_params": null }