|
{ |
|
"best_metric": 0.9829629629629629, |
|
"best_model_checkpoint": "swin-tiny-patch4-window7-224-finetuned-eurosat/checkpoint-1011", |
|
"epoch": 2.9955555555555557, |
|
"eval_steps": 500, |
|
"global_step": 1011, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.02962962962962963, |
|
"grad_norm": 6.962750434875488, |
|
"learning_rate": 4.901960784313726e-06, |
|
"loss": 2.2752, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.05925925925925926, |
|
"grad_norm": 7.147613048553467, |
|
"learning_rate": 9.803921568627451e-06, |
|
"loss": 2.1844, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.08888888888888889, |
|
"grad_norm": 8.060157775878906, |
|
"learning_rate": 1.4705882352941177e-05, |
|
"loss": 1.9996, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.11851851851851852, |
|
"grad_norm": 10.434203147888184, |
|
"learning_rate": 1.9607843137254903e-05, |
|
"loss": 1.692, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.14814814814814814, |
|
"grad_norm": 14.684704780578613, |
|
"learning_rate": 2.4509803921568626e-05, |
|
"loss": 1.2463, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.17777777777777778, |
|
"grad_norm": 15.639288902282715, |
|
"learning_rate": 2.9411764705882354e-05, |
|
"loss": 0.8793, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.2074074074074074, |
|
"grad_norm": 14.043598175048828, |
|
"learning_rate": 3.431372549019608e-05, |
|
"loss": 0.6109, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.23703703703703705, |
|
"grad_norm": 26.815710067749023, |
|
"learning_rate": 3.9215686274509805e-05, |
|
"loss": 0.5877, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.26666666666666666, |
|
"grad_norm": 18.923198699951172, |
|
"learning_rate": 4.411764705882353e-05, |
|
"loss": 0.4282, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.2962962962962963, |
|
"grad_norm": 21.275344848632812, |
|
"learning_rate": 4.901960784313725e-05, |
|
"loss": 0.4887, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.32592592592592595, |
|
"grad_norm": 58.36842346191406, |
|
"learning_rate": 4.955995599559956e-05, |
|
"loss": 0.3896, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.35555555555555557, |
|
"grad_norm": 18.006397247314453, |
|
"learning_rate": 4.9009900990099014e-05, |
|
"loss": 0.4162, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.3851851851851852, |
|
"grad_norm": 13.68799114227295, |
|
"learning_rate": 4.845984598459846e-05, |
|
"loss": 0.3605, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.4148148148148148, |
|
"grad_norm": 15.165467262268066, |
|
"learning_rate": 4.790979097909791e-05, |
|
"loss": 0.4391, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.4444444444444444, |
|
"grad_norm": 10.881718635559082, |
|
"learning_rate": 4.7359735973597365e-05, |
|
"loss": 0.3706, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.4740740740740741, |
|
"grad_norm": 15.909368515014648, |
|
"learning_rate": 4.680968096809682e-05, |
|
"loss": 0.3148, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.5037037037037037, |
|
"grad_norm": 25.3117733001709, |
|
"learning_rate": 4.6259625962596256e-05, |
|
"loss": 0.3611, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.5333333333333333, |
|
"grad_norm": 10.446706771850586, |
|
"learning_rate": 4.570957095709571e-05, |
|
"loss": 0.345, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.562962962962963, |
|
"grad_norm": 15.340864181518555, |
|
"learning_rate": 4.515951595159516e-05, |
|
"loss": 0.3645, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.5925925925925926, |
|
"grad_norm": 21.318401336669922, |
|
"learning_rate": 4.4609460946094614e-05, |
|
"loss": 0.3063, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.6222222222222222, |
|
"grad_norm": 13.705307006835938, |
|
"learning_rate": 4.405940594059406e-05, |
|
"loss": 0.2603, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.6518518518518519, |
|
"grad_norm": 14.29479694366455, |
|
"learning_rate": 4.350935093509351e-05, |
|
"loss": 0.2617, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.6814814814814815, |
|
"grad_norm": 15.511056900024414, |
|
"learning_rate": 4.2959295929592964e-05, |
|
"loss": 0.2583, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.7111111111111111, |
|
"grad_norm": 42.817989349365234, |
|
"learning_rate": 4.240924092409242e-05, |
|
"loss": 0.3146, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.7407407407407407, |
|
"grad_norm": 11.812172889709473, |
|
"learning_rate": 4.1859185918591856e-05, |
|
"loss": 0.2315, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.7703703703703704, |
|
"grad_norm": 11.193636894226074, |
|
"learning_rate": 4.130913091309131e-05, |
|
"loss": 0.329, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"grad_norm": 12.144834518432617, |
|
"learning_rate": 4.075907590759076e-05, |
|
"loss": 0.3005, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.8296296296296296, |
|
"grad_norm": 10.566153526306152, |
|
"learning_rate": 4.020902090209021e-05, |
|
"loss": 0.2443, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.8592592592592593, |
|
"grad_norm": 10.785486221313477, |
|
"learning_rate": 3.965896589658966e-05, |
|
"loss": 0.242, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.8888888888888888, |
|
"grad_norm": 14.177382469177246, |
|
"learning_rate": 3.910891089108911e-05, |
|
"loss": 0.2615, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.9185185185185185, |
|
"grad_norm": 16.649311065673828, |
|
"learning_rate": 3.8558855885588564e-05, |
|
"loss": 0.257, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.9481481481481482, |
|
"grad_norm": 11.974807739257812, |
|
"learning_rate": 3.800880088008801e-05, |
|
"loss": 0.2559, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.9777777777777777, |
|
"grad_norm": 9.136080741882324, |
|
"learning_rate": 3.745874587458746e-05, |
|
"loss": 0.2381, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.9985185185185185, |
|
"eval_accuracy": 0.9575925925925926, |
|
"eval_loss": 0.1251903772354126, |
|
"eval_runtime": 33.135, |
|
"eval_samples_per_second": 162.97, |
|
"eval_steps_per_second": 10.201, |
|
"step": 337 |
|
}, |
|
{ |
|
"epoch": 1.0074074074074073, |
|
"grad_norm": 23.605918884277344, |
|
"learning_rate": 3.690869086908691e-05, |
|
"loss": 0.2405, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 1.037037037037037, |
|
"grad_norm": 9.887730598449707, |
|
"learning_rate": 3.635863586358636e-05, |
|
"loss": 0.2412, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 1.0666666666666667, |
|
"grad_norm": 12.197110176086426, |
|
"learning_rate": 3.580858085808581e-05, |
|
"loss": 0.2298, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 1.0962962962962963, |
|
"grad_norm": 12.699515342712402, |
|
"learning_rate": 3.525852585258526e-05, |
|
"loss": 0.2001, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 1.125925925925926, |
|
"grad_norm": 13.966781616210938, |
|
"learning_rate": 3.470847084708471e-05, |
|
"loss": 0.2106, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 1.1555555555555554, |
|
"grad_norm": 20.575868606567383, |
|
"learning_rate": 3.415841584158416e-05, |
|
"loss": 0.2002, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 1.1851851851851851, |
|
"grad_norm": 15.80282974243164, |
|
"learning_rate": 3.360836083608361e-05, |
|
"loss": 0.2532, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 1.2148148148148148, |
|
"grad_norm": 18.04647445678711, |
|
"learning_rate": 3.305830583058306e-05, |
|
"loss": 0.2609, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 1.2444444444444445, |
|
"grad_norm": 8.6199312210083, |
|
"learning_rate": 3.250825082508251e-05, |
|
"loss": 0.253, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 1.2740740740740741, |
|
"grad_norm": 7.412827968597412, |
|
"learning_rate": 3.195819581958196e-05, |
|
"loss": 0.213, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 1.3037037037037038, |
|
"grad_norm": 7.904267311096191, |
|
"learning_rate": 3.140814081408141e-05, |
|
"loss": 0.2584, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 1.3333333333333333, |
|
"grad_norm": 18.86865234375, |
|
"learning_rate": 3.0858085808580864e-05, |
|
"loss": 0.227, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 1.362962962962963, |
|
"grad_norm": 5.769145965576172, |
|
"learning_rate": 3.0308030803080313e-05, |
|
"loss": 0.2016, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 1.3925925925925926, |
|
"grad_norm": 11.96108341217041, |
|
"learning_rate": 2.9757975797579756e-05, |
|
"loss": 0.1635, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 1.4222222222222223, |
|
"grad_norm": 13.738970756530762, |
|
"learning_rate": 2.9207920792079208e-05, |
|
"loss": 0.1717, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 1.4518518518518517, |
|
"grad_norm": 11.988739967346191, |
|
"learning_rate": 2.8657865786578657e-05, |
|
"loss": 0.2126, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 1.4814814814814814, |
|
"grad_norm": 11.078845977783203, |
|
"learning_rate": 2.810781078107811e-05, |
|
"loss": 0.2282, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.511111111111111, |
|
"grad_norm": 10.5954008102417, |
|
"learning_rate": 2.755775577557756e-05, |
|
"loss": 0.1351, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 1.5407407407407407, |
|
"grad_norm": 15.503268241882324, |
|
"learning_rate": 2.700770077007701e-05, |
|
"loss": 0.2507, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 1.5703703703703704, |
|
"grad_norm": 15.796453475952148, |
|
"learning_rate": 2.645764576457646e-05, |
|
"loss": 0.185, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"grad_norm": 11.540029525756836, |
|
"learning_rate": 2.5907590759075913e-05, |
|
"loss": 0.1605, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 1.6296296296296298, |
|
"grad_norm": 13.062150001525879, |
|
"learning_rate": 2.5357535753575355e-05, |
|
"loss": 0.2464, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 1.6592592592592592, |
|
"grad_norm": 11.136322021484375, |
|
"learning_rate": 2.480748074807481e-05, |
|
"loss": 0.2067, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 1.6888888888888889, |
|
"grad_norm": 8.773016929626465, |
|
"learning_rate": 2.4257425742574257e-05, |
|
"loss": 0.229, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 1.7185185185185186, |
|
"grad_norm": 8.625940322875977, |
|
"learning_rate": 2.370737073707371e-05, |
|
"loss": 0.1535, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 1.748148148148148, |
|
"grad_norm": 6.024807929992676, |
|
"learning_rate": 2.3157315731573158e-05, |
|
"loss": 0.1205, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 1.7777777777777777, |
|
"grad_norm": 9.04062557220459, |
|
"learning_rate": 2.2607260726072607e-05, |
|
"loss": 0.1724, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 1.8074074074074074, |
|
"grad_norm": 20.10211753845215, |
|
"learning_rate": 2.2057205720572056e-05, |
|
"loss": 0.1504, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 1.837037037037037, |
|
"grad_norm": 9.104300498962402, |
|
"learning_rate": 2.150715071507151e-05, |
|
"loss": 0.157, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 1.8666666666666667, |
|
"grad_norm": 8.573586463928223, |
|
"learning_rate": 2.0957095709570958e-05, |
|
"loss": 0.2248, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 1.8962962962962964, |
|
"grad_norm": 7.876155853271484, |
|
"learning_rate": 2.0407040704070407e-05, |
|
"loss": 0.1622, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 1.925925925925926, |
|
"grad_norm": 6.861870288848877, |
|
"learning_rate": 1.9856985698569856e-05, |
|
"loss": 0.1846, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 1.9555555555555557, |
|
"grad_norm": 8.942523956298828, |
|
"learning_rate": 1.930693069306931e-05, |
|
"loss": 0.1585, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 1.9851851851851852, |
|
"grad_norm": 12.770540237426758, |
|
"learning_rate": 1.8756875687568758e-05, |
|
"loss": 0.1729, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.9775925925925926, |
|
"eval_loss": 0.06562940031290054, |
|
"eval_runtime": 32.5381, |
|
"eval_samples_per_second": 165.959, |
|
"eval_steps_per_second": 10.388, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 2.0148148148148146, |
|
"grad_norm": 8.93161678314209, |
|
"learning_rate": 1.8206820682068207e-05, |
|
"loss": 0.1736, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 2.0444444444444443, |
|
"grad_norm": 8.148313522338867, |
|
"learning_rate": 1.765676567656766e-05, |
|
"loss": 0.1518, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 2.074074074074074, |
|
"grad_norm": 9.238503456115723, |
|
"learning_rate": 1.7106710671067108e-05, |
|
"loss": 0.181, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 2.1037037037037036, |
|
"grad_norm": 7.2876081466674805, |
|
"learning_rate": 1.6556655665566557e-05, |
|
"loss": 0.1277, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 2.1333333333333333, |
|
"grad_norm": 14.298846244812012, |
|
"learning_rate": 1.6006600660066006e-05, |
|
"loss": 0.1439, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 2.162962962962963, |
|
"grad_norm": 19.348224639892578, |
|
"learning_rate": 1.545654565456546e-05, |
|
"loss": 0.202, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 2.1925925925925926, |
|
"grad_norm": 8.49962043762207, |
|
"learning_rate": 1.4906490649064908e-05, |
|
"loss": 0.1552, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 2.2222222222222223, |
|
"grad_norm": 6.094121932983398, |
|
"learning_rate": 1.4356435643564355e-05, |
|
"loss": 0.177, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 2.251851851851852, |
|
"grad_norm": 10.521448135375977, |
|
"learning_rate": 1.3806380638063806e-05, |
|
"loss": 0.1667, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 2.2814814814814817, |
|
"grad_norm": 15.489738464355469, |
|
"learning_rate": 1.3256325632563257e-05, |
|
"loss": 0.1298, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 2.311111111111111, |
|
"grad_norm": 11.301315307617188, |
|
"learning_rate": 1.2706270627062708e-05, |
|
"loss": 0.1701, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 2.3407407407407406, |
|
"grad_norm": 8.533815383911133, |
|
"learning_rate": 1.2156215621562157e-05, |
|
"loss": 0.1708, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 2.3703703703703702, |
|
"grad_norm": 13.924063682556152, |
|
"learning_rate": 1.1606160616061606e-05, |
|
"loss": 0.1394, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"grad_norm": 2.552699089050293, |
|
"learning_rate": 1.1056105610561057e-05, |
|
"loss": 0.1553, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 2.4296296296296296, |
|
"grad_norm": 11.489421844482422, |
|
"learning_rate": 1.0506050605060507e-05, |
|
"loss": 0.1479, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 2.4592592592592593, |
|
"grad_norm": 11.2034330368042, |
|
"learning_rate": 9.955995599559956e-06, |
|
"loss": 0.1715, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 2.488888888888889, |
|
"grad_norm": 6.7186737060546875, |
|
"learning_rate": 9.405940594059407e-06, |
|
"loss": 0.1441, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 2.5185185185185186, |
|
"grad_norm": 7.652073383331299, |
|
"learning_rate": 8.855885588558856e-06, |
|
"loss": 0.1058, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 2.5481481481481483, |
|
"grad_norm": 10.549448013305664, |
|
"learning_rate": 8.305830583058307e-06, |
|
"loss": 0.1485, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 2.5777777777777775, |
|
"grad_norm": 7.53530740737915, |
|
"learning_rate": 7.755775577557756e-06, |
|
"loss": 0.1403, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 2.6074074074074076, |
|
"grad_norm": 12.338042259216309, |
|
"learning_rate": 7.205720572057206e-06, |
|
"loss": 0.1746, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 2.637037037037037, |
|
"grad_norm": 18.06306266784668, |
|
"learning_rate": 6.655665566556656e-06, |
|
"loss": 0.1506, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 2.6666666666666665, |
|
"grad_norm": 8.664934158325195, |
|
"learning_rate": 6.105610561056106e-06, |
|
"loss": 0.1258, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 2.696296296296296, |
|
"grad_norm": 6.592322826385498, |
|
"learning_rate": 5.555555555555556e-06, |
|
"loss": 0.1438, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 2.725925925925926, |
|
"grad_norm": 4.439072608947754, |
|
"learning_rate": 5.005500550055006e-06, |
|
"loss": 0.1049, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 2.7555555555555555, |
|
"grad_norm": 4.729221343994141, |
|
"learning_rate": 4.455445544554456e-06, |
|
"loss": 0.1445, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 2.785185185185185, |
|
"grad_norm": 6.323641300201416, |
|
"learning_rate": 3.905390539053906e-06, |
|
"loss": 0.1269, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 2.814814814814815, |
|
"grad_norm": 8.931975364685059, |
|
"learning_rate": 3.3553355335533555e-06, |
|
"loss": 0.1046, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 2.8444444444444446, |
|
"grad_norm": 25.645519256591797, |
|
"learning_rate": 2.8052805280528055e-06, |
|
"loss": 0.166, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 2.8740740740740742, |
|
"grad_norm": 5.7749128341674805, |
|
"learning_rate": 2.2552255225522554e-06, |
|
"loss": 0.1325, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 2.9037037037037035, |
|
"grad_norm": 3.4846718311309814, |
|
"learning_rate": 1.7051705170517053e-06, |
|
"loss": 0.1116, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 2.9333333333333336, |
|
"grad_norm": 9.06553840637207, |
|
"learning_rate": 1.155115511551155e-06, |
|
"loss": 0.1218, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 2.962962962962963, |
|
"grad_norm": 4.044698238372803, |
|
"learning_rate": 6.050605060506051e-07, |
|
"loss": 0.0964, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 2.9925925925925925, |
|
"grad_norm": 13.73366928100586, |
|
"learning_rate": 5.5005500550055006e-08, |
|
"loss": 0.1188, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 2.9955555555555557, |
|
"eval_accuracy": 0.9829629629629629, |
|
"eval_loss": 0.05130303278565407, |
|
"eval_runtime": 32.1348, |
|
"eval_samples_per_second": 168.042, |
|
"eval_steps_per_second": 10.518, |
|
"step": 1011 |
|
}, |
|
{ |
|
"epoch": 2.9955555555555557, |
|
"step": 1011, |
|
"total_flos": 1.608641425498964e+18, |
|
"train_loss": 0.3095316025258054, |
|
"train_runtime": 1092.5533, |
|
"train_samples_per_second": 59.311, |
|
"train_steps_per_second": 0.925 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 1011, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.608641425498964e+18, |
|
"train_batch_size": 16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|