{ "best_metric": 0.09541802108287811, "best_model_checkpoint": "autotrain-ai-image-detect-20250418-0133/checkpoint-11947", "epoch": 1.0, "eval_steps": 500, "global_step": 11947, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0020925755419770654, "grad_norm": 0.03698592260479927, "learning_rate": 3.9999567824173485e-05, "loss": 0.0423, "step": 25 }, { "epoch": 0.004185151083954131, "grad_norm": 8.344589233398438, "learning_rate": 3.999827131537151e-05, "loss": 0.0367, "step": 50 }, { "epoch": 0.006277726625931196, "grad_norm": 0.0733426958322525, "learning_rate": 3.999611052962606e-05, "loss": 0.0461, "step": 75 }, { "epoch": 0.008370302167908262, "grad_norm": 0.00953048188239336, "learning_rate": 3.999308556032107e-05, "loss": 0.0482, "step": 100 }, { "epoch": 0.010462877709885327, "grad_norm": 4.583376884460449, "learning_rate": 3.9989196538188406e-05, "loss": 0.0225, "step": 125 }, { "epoch": 0.012555453251862391, "grad_norm": 0.005306304432451725, "learning_rate": 3.9984443631302194e-05, "loss": 0.0391, "step": 150 }, { "epoch": 0.014648028793839458, "grad_norm": 13.573140144348145, "learning_rate": 3.99788270450716e-05, "loss": 0.108, "step": 175 }, { "epoch": 0.016740604335816523, "grad_norm": 4.633761405944824, "learning_rate": 3.997234702223188e-05, "loss": 0.1012, "step": 200 }, { "epoch": 0.01883317987779359, "grad_norm": 0.4037593603134155, "learning_rate": 3.996500384283397e-05, "loss": 0.0122, "step": 225 }, { "epoch": 0.020925755419770653, "grad_norm": 0.5655341148376465, "learning_rate": 3.995679782423232e-05, "loss": 0.0216, "step": 250 }, { "epoch": 0.02301833096174772, "grad_norm": 0.48408448696136475, "learning_rate": 3.9947729321071243e-05, "loss": 0.0208, "step": 275 }, { "epoch": 0.025110906503724783, "grad_norm": 0.5579467415809631, "learning_rate": 3.99377987252695e-05, "loss": 0.0292, "step": 300 }, { "epoch": 0.02720348204570185, "grad_norm": 6.891158103942871, "learning_rate": 3.992700646600344e-05, "loss": 0.0899, "step": 325 }, { "epoch": 0.029296057587678916, "grad_norm": 9.514092445373535, "learning_rate": 3.991535300968842e-05, "loss": 0.0542, "step": 350 }, { "epoch": 0.03138863312965598, "grad_norm": 5.77216911315918, "learning_rate": 3.990283885995866e-05, "loss": 0.0797, "step": 375 }, { "epoch": 0.033481208671633046, "grad_norm": 0.1123751625418663, "learning_rate": 3.988946455764545e-05, "loss": 0.0527, "step": 400 }, { "epoch": 0.03557378421361011, "grad_norm": 12.38884449005127, "learning_rate": 3.9875230680753816e-05, "loss": 0.0204, "step": 425 }, { "epoch": 0.03766635975558718, "grad_norm": 0.028740063309669495, "learning_rate": 3.98601378444375e-05, "loss": 0.0279, "step": 450 }, { "epoch": 0.03975893529756424, "grad_norm": 0.9096512794494629, "learning_rate": 3.984418670097241e-05, "loss": 0.0575, "step": 475 }, { "epoch": 0.041851510839541306, "grad_norm": 0.3465224802494049, "learning_rate": 3.98273779397284e-05, "loss": 0.0248, "step": 500 }, { "epoch": 0.04394408638151837, "grad_norm": 0.26555371284484863, "learning_rate": 3.98097122871395e-05, "loss": 0.0073, "step": 525 }, { "epoch": 0.04603666192349544, "grad_norm": 0.39954063296318054, "learning_rate": 3.9791190506672516e-05, "loss": 0.0297, "step": 550 }, { "epoch": 0.048129237465472506, "grad_norm": 0.08370428532361984, "learning_rate": 3.977181339879402e-05, "loss": 0.0357, "step": 575 }, { "epoch": 0.050221813007449566, "grad_norm": 0.010254177264869213, "learning_rate": 3.975158180093578e-05, "loss": 0.0209, "step": 600 }, { "epoch": 0.05231438854942663, "grad_norm": 0.012340884655714035, "learning_rate": 3.973049658745854e-05, "loss": 0.0755, "step": 625 }, { "epoch": 0.0544069640914037, "grad_norm": 0.42715218663215637, "learning_rate": 3.9708558669614266e-05, "loss": 0.0609, "step": 650 }, { "epoch": 0.056499539633380766, "grad_norm": 0.14249233901500702, "learning_rate": 3.968576899550673e-05, "loss": 0.0424, "step": 675 }, { "epoch": 0.05859211517535783, "grad_norm": 0.015084848739206791, "learning_rate": 3.966212855005055e-05, "loss": 0.0463, "step": 700 }, { "epoch": 0.06068469071733489, "grad_norm": 0.2930925488471985, "learning_rate": 3.963763835492865e-05, "loss": 0.0465, "step": 725 }, { "epoch": 0.06277726625931196, "grad_norm": 0.27187275886535645, "learning_rate": 3.961229946854804e-05, "loss": 0.0441, "step": 750 }, { "epoch": 0.06486984180128903, "grad_norm": 14.745759963989258, "learning_rate": 3.9586112985994154e-05, "loss": 0.0369, "step": 775 }, { "epoch": 0.06696241734326609, "grad_norm": 0.0668809786438942, "learning_rate": 3.955908003898345e-05, "loss": 0.0223, "step": 800 }, { "epoch": 0.06905499288524315, "grad_norm": 0.01884956657886505, "learning_rate": 3.953120179581457e-05, "loss": 0.0709, "step": 825 }, { "epoch": 0.07114756842722023, "grad_norm": 0.06137616187334061, "learning_rate": 3.950247946131777e-05, "loss": 0.0746, "step": 850 }, { "epoch": 0.07324014396919729, "grad_norm": 6.313259601593018, "learning_rate": 3.947291427680293e-05, "loss": 0.0722, "step": 875 }, { "epoch": 0.07533271951117436, "grad_norm": 0.09329789131879807, "learning_rate": 3.944250752000585e-05, "loss": 0.0475, "step": 900 }, { "epoch": 0.07742529505315142, "grad_norm": 0.4573472738265991, "learning_rate": 3.9411260505033065e-05, "loss": 0.0454, "step": 925 }, { "epoch": 0.07951787059512848, "grad_norm": 43.10087585449219, "learning_rate": 3.937917458230501e-05, "loss": 0.0217, "step": 950 }, { "epoch": 0.08161044613710555, "grad_norm": 0.033992718905210495, "learning_rate": 3.934625113849771e-05, "loss": 0.0336, "step": 975 }, { "epoch": 0.08370302167908261, "grad_norm": 18.495559692382812, "learning_rate": 3.9312491596482826e-05, "loss": 0.0366, "step": 1000 }, { "epoch": 0.08579559722105969, "grad_norm": 0.8050076961517334, "learning_rate": 3.9277897415266146e-05, "loss": 0.0519, "step": 1025 }, { "epoch": 0.08788817276303675, "grad_norm": 7.1265106201171875, "learning_rate": 3.924247008992456e-05, "loss": 0.0687, "step": 1050 }, { "epoch": 0.0899807483050138, "grad_norm": 0.0298299677670002, "learning_rate": 3.920621115154143e-05, "loss": 0.0591, "step": 1075 }, { "epoch": 0.09207332384699088, "grad_norm": 17.500030517578125, "learning_rate": 3.916912216714043e-05, "loss": 0.0513, "step": 1100 }, { "epoch": 0.09416589938896794, "grad_norm": 8.270520210266113, "learning_rate": 3.9131204739617785e-05, "loss": 0.0614, "step": 1125 }, { "epoch": 0.09625847493094501, "grad_norm": 0.8669015765190125, "learning_rate": 3.909246050767307e-05, "loss": 0.0556, "step": 1150 }, { "epoch": 0.09835105047292207, "grad_norm": 1.3969131708145142, "learning_rate": 3.905289114573833e-05, "loss": 0.0511, "step": 1175 }, { "epoch": 0.10044362601489913, "grad_norm": 0.07863520085811615, "learning_rate": 3.901249836390574e-05, "loss": 0.0445, "step": 1200 }, { "epoch": 0.1025362015568762, "grad_norm": 10.990960121154785, "learning_rate": 3.897128390785368e-05, "loss": 0.0525, "step": 1225 }, { "epoch": 0.10462877709885327, "grad_norm": 0.1187528669834137, "learning_rate": 3.892924955877131e-05, "loss": 0.0538, "step": 1250 }, { "epoch": 0.10672135264083034, "grad_norm": 0.329466313123703, "learning_rate": 3.888639713328159e-05, "loss": 0.0606, "step": 1275 }, { "epoch": 0.1088139281828074, "grad_norm": 0.12424540519714355, "learning_rate": 3.8842728483362756e-05, "loss": 0.0476, "step": 1300 }, { "epoch": 0.11090650372478446, "grad_norm": 0.8104080557823181, "learning_rate": 3.8798245496268297e-05, "loss": 0.0388, "step": 1325 }, { "epoch": 0.11299907926676153, "grad_norm": 0.26356926560401917, "learning_rate": 3.875295009444539e-05, "loss": 0.0435, "step": 1350 }, { "epoch": 0.11509165480873859, "grad_norm": 0.3834102749824524, "learning_rate": 3.8706844235451795e-05, "loss": 0.0295, "step": 1375 }, { "epoch": 0.11718423035071567, "grad_norm": 10.899662971496582, "learning_rate": 3.865992991187129e-05, "loss": 0.064, "step": 1400 }, { "epoch": 0.11927680589269272, "grad_norm": 0.03342663124203682, "learning_rate": 3.861220915122755e-05, "loss": 0.0311, "step": 1425 }, { "epoch": 0.12136938143466978, "grad_norm": 7.466312885284424, "learning_rate": 3.856368401589646e-05, "loss": 0.0135, "step": 1450 }, { "epoch": 0.12346195697664686, "grad_norm": 0.12502658367156982, "learning_rate": 3.8514356603017085e-05, "loss": 0.0299, "step": 1475 }, { "epoch": 0.12555453251862392, "grad_norm": 0.0441652350127697, "learning_rate": 3.846422904440096e-05, "loss": 0.0305, "step": 1500 }, { "epoch": 0.12764710806060098, "grad_norm": 8.360950469970703, "learning_rate": 3.8413303506440005e-05, "loss": 0.0612, "step": 1525 }, { "epoch": 0.12973968360257807, "grad_norm": 30.80620765686035, "learning_rate": 3.836158219001286e-05, "loss": 0.0324, "step": 1550 }, { "epoch": 0.13183225914455513, "grad_norm": 5.32338285446167, "learning_rate": 3.830906733038979e-05, "loss": 0.1051, "step": 1575 }, { "epoch": 0.13392483468653218, "grad_norm": 0.44424283504486084, "learning_rate": 3.8255761197136084e-05, "loss": 0.0862, "step": 1600 }, { "epoch": 0.13601741022850924, "grad_norm": 0.15658941864967346, "learning_rate": 3.820166609401396e-05, "loss": 0.0393, "step": 1625 }, { "epoch": 0.1381099857704863, "grad_norm": 0.022251861169934273, "learning_rate": 3.8146784358883e-05, "loss": 0.0592, "step": 1650 }, { "epoch": 0.1402025613124634, "grad_norm": 0.01628238894045353, "learning_rate": 3.8091118363599146e-05, "loss": 0.0539, "step": 1675 }, { "epoch": 0.14229513685444045, "grad_norm": 0.7614484429359436, "learning_rate": 3.8034670513912136e-05, "loss": 0.0852, "step": 1700 }, { "epoch": 0.1443877123964175, "grad_norm": 0.04743648320436478, "learning_rate": 3.797744324936159e-05, "loss": 0.0344, "step": 1725 }, { "epoch": 0.14648028793839457, "grad_norm": 10.843757629394531, "learning_rate": 3.7919439043171536e-05, "loss": 0.0516, "step": 1750 }, { "epoch": 0.14857286348037163, "grad_norm": 11.178771018981934, "learning_rate": 3.786066040214355e-05, "loss": 0.0657, "step": 1775 }, { "epoch": 0.15066543902234872, "grad_norm": 0.1333361119031906, "learning_rate": 3.780110986654841e-05, "loss": 0.0321, "step": 1800 }, { "epoch": 0.15275801456432578, "grad_norm": 8.613201141357422, "learning_rate": 3.774079001001631e-05, "loss": 0.0344, "step": 1825 }, { "epoch": 0.15485059010630284, "grad_norm": 17.0685977935791, "learning_rate": 3.7679703439425635e-05, "loss": 0.0392, "step": 1850 }, { "epoch": 0.1569431656482799, "grad_norm": 0.14994333684444427, "learning_rate": 3.76178527947903e-05, "loss": 0.0831, "step": 1875 }, { "epoch": 0.15903574119025696, "grad_norm": 0.13761335611343384, "learning_rate": 3.7555240749145644e-05, "loss": 0.0669, "step": 1900 }, { "epoch": 0.16112831673223404, "grad_norm": 0.015248261392116547, "learning_rate": 3.7491870008432935e-05, "loss": 0.038, "step": 1925 }, { "epoch": 0.1632208922742111, "grad_norm": 0.13382545113563538, "learning_rate": 3.7427743311382395e-05, "loss": 0.0298, "step": 1950 }, { "epoch": 0.16531346781618816, "grad_norm": 7.744543552398682, "learning_rate": 3.7362863429394855e-05, "loss": 0.0405, "step": 1975 }, { "epoch": 0.16740604335816522, "grad_norm": 0.06504332274198532, "learning_rate": 3.7297233166421974e-05, "loss": 0.0549, "step": 2000 }, { "epoch": 0.16949861890014228, "grad_norm": 0.023874608799815178, "learning_rate": 3.723085535884507e-05, "loss": 0.0328, "step": 2025 }, { "epoch": 0.17159119444211937, "grad_norm": 0.1030099168419838, "learning_rate": 3.716373287535252e-05, "loss": 0.0523, "step": 2050 }, { "epoch": 0.17368376998409643, "grad_norm": 12.099047660827637, "learning_rate": 3.709586861681581e-05, "loss": 0.0452, "step": 2075 }, { "epoch": 0.1757763455260735, "grad_norm": 1.0645301342010498, "learning_rate": 3.702726551616413e-05, "loss": 0.0376, "step": 2100 }, { "epoch": 0.17786892106805055, "grad_norm": 0.7780424356460571, "learning_rate": 3.6957926538257676e-05, "loss": 0.08, "step": 2125 }, { "epoch": 0.1799614966100276, "grad_norm": 0.07100946456193924, "learning_rate": 3.688785467975944e-05, "loss": 0.0382, "step": 2150 }, { "epoch": 0.1820540721520047, "grad_norm": 0.06628437340259552, "learning_rate": 3.681705296900576e-05, "loss": 0.0544, "step": 2175 }, { "epoch": 0.18414664769398176, "grad_norm": 13.79120922088623, "learning_rate": 3.674552446587543e-05, "loss": 0.08, "step": 2200 }, { "epoch": 0.18623922323595882, "grad_norm": 4.065523147583008, "learning_rate": 3.667327226165743e-05, "loss": 0.0284, "step": 2225 }, { "epoch": 0.18833179877793588, "grad_norm": 20.047752380371094, "learning_rate": 3.660029947891738e-05, "loss": 0.0808, "step": 2250 }, { "epoch": 0.19042437431991294, "grad_norm": 0.29820454120635986, "learning_rate": 3.652660927136256e-05, "loss": 0.0445, "step": 2275 }, { "epoch": 0.19251694986189002, "grad_norm": 0.5741401314735413, "learning_rate": 3.645220482370558e-05, "loss": 0.0782, "step": 2300 }, { "epoch": 0.19460952540386708, "grad_norm": 0.03809121996164322, "learning_rate": 3.637708935152683e-05, "loss": 0.0324, "step": 2325 }, { "epoch": 0.19670210094584414, "grad_norm": 0.1253562569618225, "learning_rate": 3.630126610113541e-05, "loss": 0.0573, "step": 2350 }, { "epoch": 0.1987946764878212, "grad_norm": 10.205113410949707, "learning_rate": 3.622473834942894e-05, "loss": 0.0449, "step": 2375 }, { "epoch": 0.20088725202979826, "grad_norm": 0.18986789882183075, "learning_rate": 3.6147509403751844e-05, "loss": 0.061, "step": 2400 }, { "epoch": 0.20297982757177535, "grad_norm": 0.2179853469133377, "learning_rate": 3.606958260175245e-05, "loss": 0.0501, "step": 2425 }, { "epoch": 0.2050724031137524, "grad_norm": 8.819538116455078, "learning_rate": 3.599096131123879e-05, "loss": 0.0484, "step": 2450 }, { "epoch": 0.20716497865572947, "grad_norm": 11.121920585632324, "learning_rate": 3.591164893003296e-05, "loss": 0.0516, "step": 2475 }, { "epoch": 0.20925755419770653, "grad_norm": 6.540257453918457, "learning_rate": 3.5831648885824375e-05, "loss": 0.0473, "step": 2500 }, { "epoch": 0.2113501297396836, "grad_norm": 0.03221260383725166, "learning_rate": 3.5750964636021535e-05, "loss": 0.0161, "step": 2525 }, { "epoch": 0.21344270528166068, "grad_norm": 0.006673712749034166, "learning_rate": 3.566959966760269e-05, "loss": 0.0543, "step": 2550 }, { "epoch": 0.21553528082363774, "grad_norm": 0.0166669562458992, "learning_rate": 3.5587557496965086e-05, "loss": 0.05, "step": 2575 }, { "epoch": 0.2176278563656148, "grad_norm": 0.056203585118055344, "learning_rate": 3.5504841669773e-05, "loss": 0.0318, "step": 2600 }, { "epoch": 0.21972043190759186, "grad_norm": 0.15732666850090027, "learning_rate": 3.542145576080454e-05, "loss": 0.0285, "step": 2625 }, { "epoch": 0.22181300744956892, "grad_norm": 0.6591875553131104, "learning_rate": 3.533740337379713e-05, "loss": 0.0366, "step": 2650 }, { "epoch": 0.223905582991546, "grad_norm": 110.1420669555664, "learning_rate": 3.5252688141291725e-05, "loss": 0.0378, "step": 2675 }, { "epoch": 0.22599815853352306, "grad_norm": 2.0431766510009766, "learning_rate": 3.516731372447592e-05, "loss": 0.0819, "step": 2700 }, { "epoch": 0.22809073407550012, "grad_norm": 25.620094299316406, "learning_rate": 3.5081283813025604e-05, "loss": 0.0576, "step": 2725 }, { "epoch": 0.23018330961747718, "grad_norm": 21.387821197509766, "learning_rate": 3.4994602124945604e-05, "loss": 0.08, "step": 2750 }, { "epoch": 0.23227588515945424, "grad_norm": 3.099421262741089, "learning_rate": 3.490727240640893e-05, "loss": 0.0561, "step": 2775 }, { "epoch": 0.23436846070143133, "grad_norm": 8.681183815002441, "learning_rate": 3.481929843159491e-05, "loss": 0.062, "step": 2800 }, { "epoch": 0.2364610362434084, "grad_norm": 0.09572477638721466, "learning_rate": 3.473068400252607e-05, "loss": 0.0503, "step": 2825 }, { "epoch": 0.23855361178538545, "grad_norm": 7.184039115905762, "learning_rate": 3.464143294890383e-05, "loss": 0.0848, "step": 2850 }, { "epoch": 0.2406461873273625, "grad_norm": 5.73546838760376, "learning_rate": 3.455154912794297e-05, "loss": 0.0455, "step": 2875 }, { "epoch": 0.24273876286933957, "grad_norm": 0.046288516372442245, "learning_rate": 3.4461036424204955e-05, "loss": 0.0306, "step": 2900 }, { "epoch": 0.24483133841131666, "grad_norm": 0.867224395275116, "learning_rate": 3.4369898749430044e-05, "loss": 0.0382, "step": 2925 }, { "epoch": 0.24692391395329372, "grad_norm": 0.01112967636436224, "learning_rate": 3.427814004236822e-05, "loss": 0.0927, "step": 2950 }, { "epoch": 0.24901648949527078, "grad_norm": 0.26373812556266785, "learning_rate": 3.4185764268608996e-05, "loss": 0.0338, "step": 2975 }, { "epoch": 0.25110906503724784, "grad_norm": 9.9061279296875, "learning_rate": 3.409277542041001e-05, "loss": 0.0344, "step": 3000 }, { "epoch": 0.2532016405792249, "grad_norm": 0.27759966254234314, "learning_rate": 3.399917751652448e-05, "loss": 0.0332, "step": 3025 }, { "epoch": 0.25529421612120196, "grad_norm": 0.04884539917111397, "learning_rate": 3.390497460202758e-05, "loss": 0.0848, "step": 3050 }, { "epoch": 0.257386791663179, "grad_norm": 4.758861541748047, "learning_rate": 3.3810170748141534e-05, "loss": 0.0408, "step": 3075 }, { "epoch": 0.25947936720515613, "grad_norm": 0.06590425223112106, "learning_rate": 3.371477005205974e-05, "loss": 0.0334, "step": 3100 }, { "epoch": 0.2615719427471332, "grad_norm": 0.018905460834503174, "learning_rate": 3.361877663676966e-05, "loss": 0.0278, "step": 3125 }, { "epoch": 0.26366451828911025, "grad_norm": 0.08253883570432663, "learning_rate": 3.352219465087466e-05, "loss": 0.0406, "step": 3150 }, { "epoch": 0.2657570938310873, "grad_norm": 23.68890380859375, "learning_rate": 3.34250282684147e-05, "loss": 0.0697, "step": 3175 }, { "epoch": 0.26784966937306437, "grad_norm": 0.23610574007034302, "learning_rate": 3.332728168868593e-05, "loss": 0.0315, "step": 3200 }, { "epoch": 0.26994224491504143, "grad_norm": 1.0449060201644897, "learning_rate": 3.322895913605925e-05, "loss": 0.0252, "step": 3225 }, { "epoch": 0.2720348204570185, "grad_norm": 26.833650588989258, "learning_rate": 3.313006485979772e-05, "loss": 0.0832, "step": 3250 }, { "epoch": 0.27412739599899555, "grad_norm": 0.09942333400249481, "learning_rate": 3.303060313387286e-05, "loss": 0.0174, "step": 3275 }, { "epoch": 0.2762199715409726, "grad_norm": 0.023540310561656952, "learning_rate": 3.293057825678007e-05, "loss": 0.0957, "step": 3300 }, { "epoch": 0.27831254708294967, "grad_norm": 0.07260198891162872, "learning_rate": 3.282999455135272e-05, "loss": 0.0699, "step": 3325 }, { "epoch": 0.2804051226249268, "grad_norm": 20.466096878051758, "learning_rate": 3.272885636457541e-05, "loss": 0.069, "step": 3350 }, { "epoch": 0.28249769816690384, "grad_norm": 0.5947694778442383, "learning_rate": 3.2627168067396105e-05, "loss": 0.0471, "step": 3375 }, { "epoch": 0.2845902737088809, "grad_norm": 0.021415280178189278, "learning_rate": 3.252493405453718e-05, "loss": 0.0423, "step": 3400 }, { "epoch": 0.28668284925085796, "grad_norm": 0.10097396373748779, "learning_rate": 3.242215874430554e-05, "loss": 0.0285, "step": 3425 }, { "epoch": 0.288775424792835, "grad_norm": 16.974971771240234, "learning_rate": 3.231884657840165e-05, "loss": 0.0559, "step": 3450 }, { "epoch": 0.2908680003348121, "grad_norm": 7.112433433532715, "learning_rate": 3.221500202172757e-05, "loss": 0.0499, "step": 3475 }, { "epoch": 0.29296057587678914, "grad_norm": 0.02784404344856739, "learning_rate": 3.2110629562194026e-05, "loss": 0.0308, "step": 3500 }, { "epoch": 0.2950531514187662, "grad_norm": 0.3016861379146576, "learning_rate": 3.20057337105264e-05, "loss": 0.0636, "step": 3525 }, { "epoch": 0.29714572696074326, "grad_norm": 16.54718780517578, "learning_rate": 3.190031900006984e-05, "loss": 0.0791, "step": 3550 }, { "epoch": 0.2992383025027203, "grad_norm": 4.460518836975098, "learning_rate": 3.1794389986593306e-05, "loss": 0.0682, "step": 3575 }, { "epoch": 0.30133087804469744, "grad_norm": 0.12846969068050385, "learning_rate": 3.168795124809269e-05, "loss": 0.0428, "step": 3600 }, { "epoch": 0.3034234535866745, "grad_norm": 32.92333221435547, "learning_rate": 3.158100738459298e-05, "loss": 0.0472, "step": 3625 }, { "epoch": 0.30551602912865156, "grad_norm": 7.472573757171631, "learning_rate": 3.147356301794943e-05, "loss": 0.0663, "step": 3650 }, { "epoch": 0.3076086046706286, "grad_norm": 0.17467910051345825, "learning_rate": 3.136562279164784e-05, "loss": 0.0449, "step": 3675 }, { "epoch": 0.3097011802126057, "grad_norm": 0.05833444744348526, "learning_rate": 3.125719137060384e-05, "loss": 0.0311, "step": 3700 }, { "epoch": 0.31179375575458274, "grad_norm": 0.027352772653102875, "learning_rate": 3.114827344096136e-05, "loss": 0.0474, "step": 3725 }, { "epoch": 0.3138863312965598, "grad_norm": 29.073902130126953, "learning_rate": 3.103887370989002e-05, "loss": 0.0631, "step": 3750 }, { "epoch": 0.31597890683853685, "grad_norm": 6.369044303894043, "learning_rate": 3.092899690538173e-05, "loss": 0.0718, "step": 3775 }, { "epoch": 0.3180714823805139, "grad_norm": 0.3031097948551178, "learning_rate": 3.081864777604637e-05, "loss": 0.0338, "step": 3800 }, { "epoch": 0.320164057922491, "grad_norm": 0.14146162569522858, "learning_rate": 3.070783109090657e-05, "loss": 0.0462, "step": 3825 }, { "epoch": 0.3222566334644681, "grad_norm": 7.264471054077148, "learning_rate": 3.0596551639191564e-05, "loss": 0.0507, "step": 3850 }, { "epoch": 0.32434920900644515, "grad_norm": 5.5302205085754395, "learning_rate": 3.0484814230130264e-05, "loss": 0.0594, "step": 3875 }, { "epoch": 0.3264417845484222, "grad_norm": 0.14829619228839874, "learning_rate": 3.0372623692743386e-05, "loss": 0.0154, "step": 3900 }, { "epoch": 0.32853436009039927, "grad_norm": 0.028743617236614227, "learning_rate": 3.0259984875634737e-05, "loss": 0.0758, "step": 3925 }, { "epoch": 0.33062693563237633, "grad_norm": 33.016883850097656, "learning_rate": 3.014690264678172e-05, "loss": 0.0608, "step": 3950 }, { "epoch": 0.3327195111743534, "grad_norm": 8.965993881225586, "learning_rate": 3.00333818933249e-05, "loss": 0.036, "step": 3975 }, { "epoch": 0.33481208671633045, "grad_norm": 0.054518092423677444, "learning_rate": 2.9919427521356827e-05, "loss": 0.0458, "step": 4000 }, { "epoch": 0.3369046622583075, "grad_norm": 0.04685276001691818, "learning_rate": 2.980504445570998e-05, "loss": 0.0587, "step": 4025 }, { "epoch": 0.33899723780028457, "grad_norm": 22.658573150634766, "learning_rate": 2.9690237639743965e-05, "loss": 0.0122, "step": 4050 }, { "epoch": 0.3410898133422617, "grad_norm": 0.01873711682856083, "learning_rate": 2.9575012035131823e-05, "loss": 0.047, "step": 4075 }, { "epoch": 0.34318238888423874, "grad_norm": 0.04110574722290039, "learning_rate": 2.9459372621645667e-05, "loss": 0.0196, "step": 4100 }, { "epoch": 0.3452749644262158, "grad_norm": 0.44694823026657104, "learning_rate": 2.934332439694139e-05, "loss": 0.0772, "step": 4125 }, { "epoch": 0.34736753996819286, "grad_norm": 5.95962381362915, "learning_rate": 2.9226872376342745e-05, "loss": 0.0203, "step": 4150 }, { "epoch": 0.3494601155101699, "grad_norm": 0.111228346824646, "learning_rate": 2.9110021592624553e-05, "loss": 0.068, "step": 4175 }, { "epoch": 0.351552691052147, "grad_norm": 0.08670425415039062, "learning_rate": 2.899277709579522e-05, "loss": 0.0349, "step": 4200 }, { "epoch": 0.35364526659412404, "grad_norm": 7.51801872253418, "learning_rate": 2.8875143952878474e-05, "loss": 0.0656, "step": 4225 }, { "epoch": 0.3557378421361011, "grad_norm": 0.4301106333732605, "learning_rate": 2.875712724769439e-05, "loss": 0.0337, "step": 4250 }, { "epoch": 0.35783041767807816, "grad_norm": 0.04058774933218956, "learning_rate": 2.863873208063969e-05, "loss": 0.0633, "step": 4275 }, { "epoch": 0.3599229932200552, "grad_norm": 0.29651474952697754, "learning_rate": 2.8519963568467277e-05, "loss": 0.0156, "step": 4300 }, { "epoch": 0.36201556876203234, "grad_norm": 0.7200632691383362, "learning_rate": 2.8400826844065146e-05, "loss": 0.0601, "step": 4325 }, { "epoch": 0.3641081443040094, "grad_norm": 0.13928024470806122, "learning_rate": 2.828132705623454e-05, "loss": 0.0505, "step": 4350 }, { "epoch": 0.36620071984598646, "grad_norm": 2.0252275466918945, "learning_rate": 2.8161469369467413e-05, "loss": 0.0213, "step": 4375 }, { "epoch": 0.3682932953879635, "grad_norm": 0.16088655591011047, "learning_rate": 2.8041258963723246e-05, "loss": 0.0167, "step": 4400 }, { "epoch": 0.3703858709299406, "grad_norm": 15.17540454864502, "learning_rate": 2.7920701034205184e-05, "loss": 0.0874, "step": 4425 }, { "epoch": 0.37247844647191763, "grad_norm": 6.356795787811279, "learning_rate": 2.7799800791135513e-05, "loss": 0.0839, "step": 4450 }, { "epoch": 0.3745710220138947, "grad_norm": 0.16258519887924194, "learning_rate": 2.767856345953048e-05, "loss": 0.0287, "step": 4475 }, { "epoch": 0.37666359755587175, "grad_norm": 0.0377529114484787, "learning_rate": 2.7556994278974483e-05, "loss": 0.0285, "step": 4500 }, { "epoch": 0.3787561730978488, "grad_norm": 0.036431267857551575, "learning_rate": 2.743509850339363e-05, "loss": 0.0326, "step": 4525 }, { "epoch": 0.3808487486398259, "grad_norm": 2.534477710723877, "learning_rate": 2.7312881400828678e-05, "loss": 0.0373, "step": 4550 }, { "epoch": 0.382941324181803, "grad_norm": 0.5988636612892151, "learning_rate": 2.719034825320736e-05, "loss": 0.0544, "step": 4575 }, { "epoch": 0.38503389972378005, "grad_norm": 15.286279678344727, "learning_rate": 2.706750435611611e-05, "loss": 0.0494, "step": 4600 }, { "epoch": 0.3871264752657571, "grad_norm": 0.05748358368873596, "learning_rate": 2.694435501857121e-05, "loss": 0.0355, "step": 4625 }, { "epoch": 0.38921905080773417, "grad_norm": 0.20348195731639862, "learning_rate": 2.682090556278932e-05, "loss": 0.0311, "step": 4650 }, { "epoch": 0.39131162634971123, "grad_norm": 0.05684299394488335, "learning_rate": 2.669716132395751e-05, "loss": 0.0366, "step": 4675 }, { "epoch": 0.3934042018916883, "grad_norm": 0.6135233044624329, "learning_rate": 2.6573127650002647e-05, "loss": 0.0439, "step": 4700 }, { "epoch": 0.39549677743366535, "grad_norm": 0.07451584190130234, "learning_rate": 2.644880990136029e-05, "loss": 0.0221, "step": 4725 }, { "epoch": 0.3975893529756424, "grad_norm": 0.18352004885673523, "learning_rate": 2.632421345074301e-05, "loss": 0.014, "step": 4750 }, { "epoch": 0.39968192851761947, "grad_norm": 15.234163284301758, "learning_rate": 2.619934368290821e-05, "loss": 0.0506, "step": 4775 }, { "epoch": 0.4017745040595965, "grad_norm": 0.15898753702640533, "learning_rate": 2.60742059944254e-05, "loss": 0.0339, "step": 4800 }, { "epoch": 0.40386707960157364, "grad_norm": 10.560185432434082, "learning_rate": 2.594880579344298e-05, "loss": 0.0681, "step": 4825 }, { "epoch": 0.4059596551435507, "grad_norm": 0.027397403493523598, "learning_rate": 2.5823148499454503e-05, "loss": 0.0387, "step": 4850 }, { "epoch": 0.40805223068552776, "grad_norm": 0.042963869869709015, "learning_rate": 2.5697239543064453e-05, "loss": 0.0575, "step": 4875 }, { "epoch": 0.4101448062275048, "grad_norm": 0.041683413088321686, "learning_rate": 2.557108436575356e-05, "loss": 0.0586, "step": 4900 }, { "epoch": 0.4122373817694819, "grad_norm": 0.016998037695884705, "learning_rate": 2.544468841964363e-05, "loss": 0.0532, "step": 4925 }, { "epoch": 0.41432995731145894, "grad_norm": 0.28552761673927307, "learning_rate": 2.531805716726191e-05, "loss": 0.0724, "step": 4950 }, { "epoch": 0.416422532853436, "grad_norm": 11.638047218322754, "learning_rate": 2.5191196081305008e-05, "loss": 0.0705, "step": 4975 }, { "epoch": 0.41851510839541306, "grad_norm": 0.08826717734336853, "learning_rate": 2.50641106444024e-05, "loss": 0.0216, "step": 5000 }, { "epoch": 0.4206076839373901, "grad_norm": 0.15376928448677063, "learning_rate": 2.493680634887946e-05, "loss": 0.0349, "step": 5025 }, { "epoch": 0.4227002594793672, "grad_norm": 0.09139838069677353, "learning_rate": 2.48092886965201e-05, "loss": 0.0422, "step": 5050 }, { "epoch": 0.4247928350213443, "grad_norm": 0.6814232468605042, "learning_rate": 2.4681563198328993e-05, "loss": 0.0522, "step": 5075 }, { "epoch": 0.42688541056332135, "grad_norm": 0.7719263434410095, "learning_rate": 2.4553635374293428e-05, "loss": 0.0218, "step": 5100 }, { "epoch": 0.4289779861052984, "grad_norm": 4.256403923034668, "learning_rate": 2.4425510753144697e-05, "loss": 0.0255, "step": 5125 }, { "epoch": 0.4310705616472755, "grad_norm": 16.040773391723633, "learning_rate": 2.429719487211922e-05, "loss": 0.0389, "step": 5150 }, { "epoch": 0.43316313718925253, "grad_norm": 0.01401565782725811, "learning_rate": 2.4168693276719184e-05, "loss": 0.0672, "step": 5175 }, { "epoch": 0.4352557127312296, "grad_norm": 3.7595765590667725, "learning_rate": 2.404001152047291e-05, "loss": 0.0497, "step": 5200 }, { "epoch": 0.43734828827320665, "grad_norm": 0.5674498081207275, "learning_rate": 2.391115516469483e-05, "loss": 0.025, "step": 5225 }, { "epoch": 0.4394408638151837, "grad_norm": 3.231126308441162, "learning_rate": 2.3782129778245158e-05, "loss": 0.0363, "step": 5250 }, { "epoch": 0.4415334393571608, "grad_norm": 0.01875438168644905, "learning_rate": 2.365294093728919e-05, "loss": 0.0503, "step": 5275 }, { "epoch": 0.44362601489913783, "grad_norm": 0.7183122634887695, "learning_rate": 2.3523594225056342e-05, "loss": 0.0562, "step": 5300 }, { "epoch": 0.44571859044111495, "grad_norm": 25.533050537109375, "learning_rate": 2.3394095231598835e-05, "loss": 0.0704, "step": 5325 }, { "epoch": 0.447811165983092, "grad_norm": 0.07413307577371597, "learning_rate": 2.326444955355013e-05, "loss": 0.0144, "step": 5350 }, { "epoch": 0.44990374152506907, "grad_norm": 0.1055075004696846, "learning_rate": 2.3134662793883032e-05, "loss": 0.1008, "step": 5375 }, { "epoch": 0.4519963170670461, "grad_norm": 0.017579292878508568, "learning_rate": 2.3004740561667554e-05, "loss": 0.0628, "step": 5400 }, { "epoch": 0.4540888926090232, "grad_norm": 8.102964401245117, "learning_rate": 2.28746884718285e-05, "loss": 0.0642, "step": 5425 }, { "epoch": 0.45618146815100025, "grad_norm": 10.838987350463867, "learning_rate": 2.2744512144902816e-05, "loss": 0.0271, "step": 5450 }, { "epoch": 0.4582740436929773, "grad_norm": 13.289299964904785, "learning_rate": 2.2614217206796668e-05, "loss": 0.0474, "step": 5475 }, { "epoch": 0.46036661923495437, "grad_norm": 49.52151107788086, "learning_rate": 2.2483809288542317e-05, "loss": 0.0635, "step": 5500 }, { "epoch": 0.4624591947769314, "grad_norm": 0.045649878680706024, "learning_rate": 2.2353294026054745e-05, "loss": 0.0723, "step": 5525 }, { "epoch": 0.4645517703189085, "grad_norm": 19.275131225585938, "learning_rate": 2.22226770598881e-05, "loss": 0.0483, "step": 5550 }, { "epoch": 0.4666443458608856, "grad_norm": 0.4754731357097626, "learning_rate": 2.2091964034991903e-05, "loss": 0.0602, "step": 5575 }, { "epoch": 0.46873692140286266, "grad_norm": 0.023405829444527626, "learning_rate": 2.196116060046712e-05, "loss": 0.0669, "step": 5600 }, { "epoch": 0.4708294969448397, "grad_norm": 0.06915944814682007, "learning_rate": 2.1830272409321997e-05, "loss": 0.0604, "step": 5625 }, { "epoch": 0.4729220724868168, "grad_norm": 0.10562706738710403, "learning_rate": 2.1699305118227745e-05, "loss": 0.0737, "step": 5650 }, { "epoch": 0.47501464802879384, "grad_norm": 0.24281857907772064, "learning_rate": 2.1568264387274095e-05, "loss": 0.0368, "step": 5675 }, { "epoch": 0.4771072235707709, "grad_norm": 5.6087517738342285, "learning_rate": 2.1437155879724664e-05, "loss": 0.111, "step": 5700 }, { "epoch": 0.47919979911274796, "grad_norm": 7.008552551269531, "learning_rate": 2.130598526177222e-05, "loss": 0.0397, "step": 5725 }, { "epoch": 0.481292374654725, "grad_norm": 0.5012567043304443, "learning_rate": 2.1174758202293783e-05, "loss": 0.0257, "step": 5750 }, { "epoch": 0.4833849501967021, "grad_norm": 0.17054887115955353, "learning_rate": 2.104348037260564e-05, "loss": 0.0635, "step": 5775 }, { "epoch": 0.48547752573867914, "grad_norm": 18.033811569213867, "learning_rate": 2.091215744621825e-05, "loss": 0.075, "step": 5800 }, { "epoch": 0.48757010128065625, "grad_norm": 6.623977184295654, "learning_rate": 2.078079509859103e-05, "loss": 0.0448, "step": 5825 }, { "epoch": 0.4896626768226333, "grad_norm": 12.964604377746582, "learning_rate": 2.0649399006887106e-05, "loss": 0.0385, "step": 5850 }, { "epoch": 0.4917552523646104, "grad_norm": 19.55506706237793, "learning_rate": 2.051797484972793e-05, "loss": 0.0614, "step": 5875 }, { "epoch": 0.49384782790658743, "grad_norm": 0.6688134670257568, "learning_rate": 2.0386528306947873e-05, "loss": 0.0567, "step": 5900 }, { "epoch": 0.4959404034485645, "grad_norm": 0.5797390341758728, "learning_rate": 2.025506505934876e-05, "loss": 0.0378, "step": 5925 }, { "epoch": 0.49803297899054155, "grad_norm": 5.049008369445801, "learning_rate": 2.0123590788454365e-05, "loss": 0.0649, "step": 5950 }, { "epoch": 0.5001255545325186, "grad_norm": 0.4083123803138733, "learning_rate": 1.9992111176264857e-05, "loss": 0.0221, "step": 5975 }, { "epoch": 0.5022181300744957, "grad_norm": 0.05658812075853348, "learning_rate": 1.986063190501124e-05, "loss": 0.0804, "step": 6000 }, { "epoch": 0.5043107056164727, "grad_norm": 0.026742545887827873, "learning_rate": 1.9729158656909784e-05, "loss": 0.042, "step": 6025 }, { "epoch": 0.5064032811584498, "grad_norm": 1.0423567295074463, "learning_rate": 1.9597697113916464e-05, "loss": 0.0527, "step": 6050 }, { "epoch": 0.5084958567004269, "grad_norm": 8.540645599365234, "learning_rate": 1.946625295748137e-05, "loss": 0.0589, "step": 6075 }, { "epoch": 0.5105884322424039, "grad_norm": 0.3264181315898895, "learning_rate": 1.9334831868303206e-05, "loss": 0.0438, "step": 6100 }, { "epoch": 0.512681007784381, "grad_norm": 0.10669530928134918, "learning_rate": 1.9203439526083747e-05, "loss": 0.0269, "step": 6125 }, { "epoch": 0.514773583326358, "grad_norm": 0.03823905065655708, "learning_rate": 1.9072081609282408e-05, "loss": 0.0704, "step": 6150 }, { "epoch": 0.5168661588683352, "grad_norm": 0.04673992842435837, "learning_rate": 1.894076379487081e-05, "loss": 0.0345, "step": 6175 }, { "epoch": 0.5189587344103123, "grad_norm": 0.10730260610580444, "learning_rate": 1.8809491758087463e-05, "loss": 0.0485, "step": 6200 }, { "epoch": 0.5210513099522893, "grad_norm": 0.4243800640106201, "learning_rate": 1.8678271172192458e-05, "loss": 0.0482, "step": 6225 }, { "epoch": 0.5231438854942664, "grad_norm": 0.14486907422542572, "learning_rate": 1.854710770822231e-05, "loss": 0.0314, "step": 6250 }, { "epoch": 0.5252364610362434, "grad_norm": 0.09311558306217194, "learning_rate": 1.841600703474487e-05, "loss": 0.0412, "step": 6275 }, { "epoch": 0.5273290365782205, "grad_norm": 0.009483098983764648, "learning_rate": 1.828497481761432e-05, "loss": 0.0321, "step": 6300 }, { "epoch": 0.5294216121201976, "grad_norm": 0.03347919136285782, "learning_rate": 1.8154016719726345e-05, "loss": 0.0594, "step": 6325 }, { "epoch": 0.5315141876621746, "grad_norm": 7.508219242095947, "learning_rate": 1.8023138400773362e-05, "loss": 0.057, "step": 6350 }, { "epoch": 0.5336067632041517, "grad_norm": 0.15084685385227203, "learning_rate": 1.7892345516999934e-05, "loss": 0.05, "step": 6375 }, { "epoch": 0.5356993387461287, "grad_norm": 0.13771960139274597, "learning_rate": 1.7761643720958324e-05, "loss": 0.029, "step": 6400 }, { "epoch": 0.5377919142881058, "grad_norm": 0.024205004796385765, "learning_rate": 1.7631038661264214e-05, "loss": 0.0555, "step": 6425 }, { "epoch": 0.5398844898300829, "grad_norm": 0.03557073697447777, "learning_rate": 1.750053598235256e-05, "loss": 0.0293, "step": 6450 }, { "epoch": 0.5419770653720599, "grad_norm": 0.07501186430454254, "learning_rate": 1.7370141324233672e-05, "loss": 0.0283, "step": 6475 }, { "epoch": 0.544069640914037, "grad_norm": 0.033643439412117004, "learning_rate": 1.723986032224947e-05, "loss": 0.0087, "step": 6500 }, { "epoch": 0.546162216456014, "grad_norm": 0.08923541009426117, "learning_rate": 1.7109698606829926e-05, "loss": 0.0417, "step": 6525 }, { "epoch": 0.5482547919979911, "grad_norm": 0.1683102399110794, "learning_rate": 1.6979661803249726e-05, "loss": 0.0081, "step": 6550 }, { "epoch": 0.5503473675399682, "grad_norm": 0.03645162284374237, "learning_rate": 1.6849755531385184e-05, "loss": 0.0286, "step": 6575 }, { "epoch": 0.5524399430819452, "grad_norm": 6.731907844543457, "learning_rate": 1.671998540547134e-05, "loss": 0.0511, "step": 6600 }, { "epoch": 0.5545325186239223, "grad_norm": 0.34839117527008057, "learning_rate": 1.6590357033859334e-05, "loss": 0.0529, "step": 6625 }, { "epoch": 0.5566250941658993, "grad_norm": 0.04825979471206665, "learning_rate": 1.6460876018774033e-05, "loss": 0.0478, "step": 6650 }, { "epoch": 0.5587176697078765, "grad_norm": 0.15074151754379272, "learning_rate": 1.63315479560719e-05, "loss": 0.0403, "step": 6675 }, { "epoch": 0.5608102452498536, "grad_norm": 0.8657410144805908, "learning_rate": 1.6202378434999194e-05, "loss": 0.0297, "step": 6700 }, { "epoch": 0.5629028207918306, "grad_norm": 25.035709381103516, "learning_rate": 1.607337303795035e-05, "loss": 0.0258, "step": 6725 }, { "epoch": 0.5649953963338077, "grad_norm": 5.161378860473633, "learning_rate": 1.594453734022678e-05, "loss": 0.0357, "step": 6750 }, { "epoch": 0.5670879718757847, "grad_norm": 15.704187393188477, "learning_rate": 1.5815876909795907e-05, "loss": 0.0589, "step": 6775 }, { "epoch": 0.5691805474177618, "grad_norm": 0.023194925859570503, "learning_rate": 1.568739730705051e-05, "loss": 0.0335, "step": 6800 }, { "epoch": 0.5712731229597389, "grad_norm": 54.290287017822266, "learning_rate": 1.5559104084568443e-05, "loss": 0.0282, "step": 6825 }, { "epoch": 0.5733656985017159, "grad_norm": 0.055701714009046555, "learning_rate": 1.543100278687265e-05, "loss": 0.0264, "step": 6850 }, { "epoch": 0.575458274043693, "grad_norm": 5.854345321655273, "learning_rate": 1.5303098950191556e-05, "loss": 0.0544, "step": 6875 }, { "epoch": 0.57755084958567, "grad_norm": 0.003441957989707589, "learning_rate": 1.5175398102219787e-05, "loss": 0.0356, "step": 6900 }, { "epoch": 0.5796434251276471, "grad_norm": 0.08189363032579422, "learning_rate": 1.5047905761879303e-05, "loss": 0.0494, "step": 6925 }, { "epoch": 0.5817360006696242, "grad_norm": 7.302794456481934, "learning_rate": 1.4920627439080858e-05, "loss": 0.0255, "step": 6950 }, { "epoch": 0.5838285762116012, "grad_norm": 14.938457489013672, "learning_rate": 1.4793568634485885e-05, "loss": 0.0662, "step": 6975 }, { "epoch": 0.5859211517535783, "grad_norm": 0.264533668756485, "learning_rate": 1.4666734839268773e-05, "loss": 0.0719, "step": 7000 }, { "epoch": 0.5880137272955553, "grad_norm": 1.2785422801971436, "learning_rate": 1.454013153487955e-05, "loss": 0.0459, "step": 7025 }, { "epoch": 0.5901063028375324, "grad_norm": 0.13779093325138092, "learning_rate": 1.4413764192806993e-05, "loss": 0.0313, "step": 7050 }, { "epoch": 0.5921988783795095, "grad_norm": 1.123705506324768, "learning_rate": 1.4287638274342148e-05, "loss": 0.0703, "step": 7075 }, { "epoch": 0.5942914539214865, "grad_norm": 3.7024033069610596, "learning_rate": 1.4161759230342321e-05, "loss": 0.0347, "step": 7100 }, { "epoch": 0.5963840294634636, "grad_norm": 4.496721267700195, "learning_rate": 1.40361325009955e-05, "loss": 0.0591, "step": 7125 }, { "epoch": 0.5984766050054406, "grad_norm": 10.648604393005371, "learning_rate": 1.3910763515585244e-05, "loss": 0.0486, "step": 7150 }, { "epoch": 0.6005691805474178, "grad_norm": 5.158627033233643, "learning_rate": 1.3785657692256033e-05, "loss": 0.0699, "step": 7175 }, { "epoch": 0.6026617560893949, "grad_norm": 0.5422956347465515, "learning_rate": 1.3660820437779148e-05, "loss": 0.0637, "step": 7200 }, { "epoch": 0.6047543316313719, "grad_norm": 0.2532649040222168, "learning_rate": 1.3536257147318938e-05, "loss": 0.0426, "step": 7225 }, { "epoch": 0.606846907173349, "grad_norm": 5.536031246185303, "learning_rate": 1.341197320419971e-05, "loss": 0.0545, "step": 7250 }, { "epoch": 0.608939482715326, "grad_norm": 0.14337138831615448, "learning_rate": 1.3287973979673043e-05, "loss": 0.0293, "step": 7275 }, { "epoch": 0.6110320582573031, "grad_norm": 0.029332490637898445, "learning_rate": 1.3164264832685673e-05, "loss": 0.0628, "step": 7300 }, { "epoch": 0.6131246337992802, "grad_norm": 0.3548976480960846, "learning_rate": 1.3040851109647892e-05, "loss": 0.0532, "step": 7325 }, { "epoch": 0.6152172093412572, "grad_norm": 0.19546709954738617, "learning_rate": 1.2917738144202466e-05, "loss": 0.0821, "step": 7350 }, { "epoch": 0.6173097848832343, "grad_norm": 1.9515585899353027, "learning_rate": 1.2794931256994155e-05, "loss": 0.05, "step": 7375 }, { "epoch": 0.6194023604252114, "grad_norm": 0.4851491451263428, "learning_rate": 1.2672435755439761e-05, "loss": 0.0314, "step": 7400 }, { "epoch": 0.6214949359671884, "grad_norm": 1.249005913734436, "learning_rate": 1.2550256933498745e-05, "loss": 0.0552, "step": 7425 }, { "epoch": 0.6235875115091655, "grad_norm": 7.1881890296936035, "learning_rate": 1.2428400071444447e-05, "loss": 0.036, "step": 7450 }, { "epoch": 0.6256800870511425, "grad_norm": 0.07499822229146957, "learning_rate": 1.2306870435635867e-05, "loss": 0.0338, "step": 7475 }, { "epoch": 0.6277726625931196, "grad_norm": 0.27988529205322266, "learning_rate": 1.2185673278290093e-05, "loss": 0.0243, "step": 7500 }, { "epoch": 0.6298652381350967, "grad_norm": 21.607757568359375, "learning_rate": 1.2064813837255284e-05, "loss": 0.0693, "step": 7525 }, { "epoch": 0.6319578136770737, "grad_norm": 0.4316962957382202, "learning_rate": 1.1944297335784327e-05, "loss": 0.0602, "step": 7550 }, { "epoch": 0.6340503892190508, "grad_norm": 5.633823871612549, "learning_rate": 1.1824128982309085e-05, "loss": 0.0552, "step": 7575 }, { "epoch": 0.6361429647610278, "grad_norm": 0.07708461582660675, "learning_rate": 1.1704313970215302e-05, "loss": 0.0323, "step": 7600 }, { "epoch": 0.6382355403030049, "grad_norm": 0.06260835379362106, "learning_rate": 1.1584857477618168e-05, "loss": 0.0312, "step": 7625 }, { "epoch": 0.640328115844982, "grad_norm": 0.11298274248838425, "learning_rate": 1.146576466713852e-05, "loss": 0.0551, "step": 7650 }, { "epoch": 0.6424206913869591, "grad_norm": 5.66866397857666, "learning_rate": 1.1347040685679747e-05, "loss": 0.0382, "step": 7675 }, { "epoch": 0.6445132669289362, "grad_norm": 1.05096435546875, "learning_rate": 1.1228690664205335e-05, "loss": 0.0213, "step": 7700 }, { "epoch": 0.6466058424709132, "grad_norm": 0.7647340893745422, "learning_rate": 1.1110719717517102e-05, "loss": 0.0467, "step": 7725 }, { "epoch": 0.6486984180128903, "grad_norm": 15.440308570861816, "learning_rate": 1.0993132944034206e-05, "loss": 0.0436, "step": 7750 }, { "epoch": 0.6507909935548674, "grad_norm": 16.530546188354492, "learning_rate": 1.087593542557273e-05, "loss": 0.0268, "step": 7775 }, { "epoch": 0.6528835690968444, "grad_norm": 0.1774834841489792, "learning_rate": 1.0759132227126133e-05, "loss": 0.0293, "step": 7800 }, { "epoch": 0.6549761446388215, "grad_norm": 0.04517792537808418, "learning_rate": 1.064272839664628e-05, "loss": 0.0616, "step": 7825 }, { "epoch": 0.6570687201807985, "grad_norm": 1.20844566822052, "learning_rate": 1.0526728964825357e-05, "loss": 0.0482, "step": 7850 }, { "epoch": 0.6591612957227756, "grad_norm": 0.033426906913518906, "learning_rate": 1.0411138944878371e-05, "loss": 0.0274, "step": 7875 }, { "epoch": 0.6612538712647527, "grad_norm": 0.012136283330619335, "learning_rate": 1.0295963332326587e-05, "loss": 0.0725, "step": 7900 }, { "epoch": 0.6633464468067297, "grad_norm": 0.18010033667087555, "learning_rate": 1.0181207104781539e-05, "loss": 0.0485, "step": 7925 }, { "epoch": 0.6654390223487068, "grad_norm": 0.27462857961654663, "learning_rate": 1.0066875221729995e-05, "loss": 0.0314, "step": 7950 }, { "epoch": 0.6675315978906838, "grad_norm": 7.735607624053955, "learning_rate": 9.952972624319542e-06, "loss": 0.0468, "step": 7975 }, { "epoch": 0.6696241734326609, "grad_norm": 0.2843437194824219, "learning_rate": 9.839504235145117e-06, "loss": 0.0419, "step": 8000 }, { "epoch": 0.671716748974638, "grad_norm": 0.6654235124588013, "learning_rate": 9.726474958036188e-06, "loss": 0.0894, "step": 8025 }, { "epoch": 0.673809324516615, "grad_norm": 6.332837104797363, "learning_rate": 9.613889677844899e-06, "loss": 0.0371, "step": 8050 }, { "epoch": 0.6759019000585921, "grad_norm": 0.2247881293296814, "learning_rate": 9.501753260234885e-06, "loss": 0.0333, "step": 8075 }, { "epoch": 0.6779944756005691, "grad_norm": 46.26710891723633, "learning_rate": 9.39007055147106e-06, "loss": 0.0916, "step": 8100 }, { "epoch": 0.6800870511425462, "grad_norm": 10.392670631408691, "learning_rate": 9.278846378210105e-06, "loss": 0.0376, "step": 8125 }, { "epoch": 0.6821796266845234, "grad_norm": 0.12523211538791656, "learning_rate": 9.168085547291925e-06, "loss": 0.0604, "step": 8150 }, { "epoch": 0.6842722022265004, "grad_norm": 0.7956030964851379, "learning_rate": 9.057792845531896e-06, "loss": 0.0348, "step": 8175 }, { "epoch": 0.6863647777684775, "grad_norm": 0.120228111743927, "learning_rate": 8.947973039513956e-06, "loss": 0.0626, "step": 8200 }, { "epoch": 0.6884573533104545, "grad_norm": 0.10342678427696228, "learning_rate": 8.838630875384664e-06, "loss": 0.0423, "step": 8225 }, { "epoch": 0.6905499288524316, "grad_norm": 0.045878130942583084, "learning_rate": 8.72977107864802e-06, "loss": 0.0484, "step": 8250 }, { "epoch": 0.6926425043944087, "grad_norm": 0.035083040595054626, "learning_rate": 8.6213983539613e-06, "loss": 0.0255, "step": 8275 }, { "epoch": 0.6947350799363857, "grad_norm": 6.96301794052124, "learning_rate": 8.51351738493168e-06, "loss": 0.0561, "step": 8300 }, { "epoch": 0.6968276554783628, "grad_norm": 0.13092511892318726, "learning_rate": 8.406132833913869e-06, "loss": 0.0622, "step": 8325 }, { "epoch": 0.6989202310203398, "grad_norm": 0.3119213879108429, "learning_rate": 8.29924934180856e-06, "loss": 0.0426, "step": 8350 }, { "epoch": 0.7010128065623169, "grad_norm": 0.057294104248285294, "learning_rate": 8.19287152786192e-06, "loss": 0.0652, "step": 8375 }, { "epoch": 0.703105382104294, "grad_norm": 0.6926544308662415, "learning_rate": 8.087003989465905e-06, "loss": 0.0447, "step": 8400 }, { "epoch": 0.705197957646271, "grad_norm": 0.12673336267471313, "learning_rate": 7.981651301959618e-06, "loss": 0.0633, "step": 8425 }, { "epoch": 0.7072905331882481, "grad_norm": 0.28860193490982056, "learning_rate": 7.876818018431526e-06, "loss": 0.0527, "step": 8450 }, { "epoch": 0.7093831087302251, "grad_norm": 7.478037357330322, "learning_rate": 7.77250866952274e-06, "loss": 0.0452, "step": 8475 }, { "epoch": 0.7114756842722022, "grad_norm": 16.230470657348633, "learning_rate": 7.668727763231152e-06, "loss": 0.03, "step": 8500 }, { "epoch": 0.7135682598141793, "grad_norm": 12.577932357788086, "learning_rate": 7.56547978471667e-06, "loss": 0.0294, "step": 8525 }, { "epoch": 0.7156608353561563, "grad_norm": 0.06792646646499634, "learning_rate": 7.462769196107325e-06, "loss": 0.0672, "step": 8550 }, { "epoch": 0.7177534108981334, "grad_norm": 0.8921266794204712, "learning_rate": 7.360600436306487e-06, "loss": 0.0218, "step": 8575 }, { "epoch": 0.7198459864401104, "grad_norm": 0.06442731618881226, "learning_rate": 7.25897792080096e-06, "loss": 0.0086, "step": 8600 }, { "epoch": 0.7219385619820875, "grad_norm": 0.8217195272445679, "learning_rate": 7.157906041470222e-06, "loss": 0.0563, "step": 8625 }, { "epoch": 0.7240311375240647, "grad_norm": 20.53885269165039, "learning_rate": 7.057389166396553e-06, "loss": 0.0598, "step": 8650 }, { "epoch": 0.7261237130660417, "grad_norm": 0.01888943649828434, "learning_rate": 6.957431639676335e-06, "loss": 0.028, "step": 8675 }, { "epoch": 0.7282162886080188, "grad_norm": 5.670931816101074, "learning_rate": 6.858037781232218e-06, "loss": 0.0184, "step": 8700 }, { "epoch": 0.7303088641499959, "grad_norm": 10.210001945495605, "learning_rate": 6.7592118866265136e-06, "loss": 0.065, "step": 8725 }, { "epoch": 0.7324014396919729, "grad_norm": 7.783575057983398, "learning_rate": 6.660958226875474e-06, "loss": 0.0598, "step": 8750 }, { "epoch": 0.73449401523395, "grad_norm": 12.363920211791992, "learning_rate": 6.563281048264772e-06, "loss": 0.0893, "step": 8775 }, { "epoch": 0.736586590775927, "grad_norm": 0.0548861064016819, "learning_rate": 6.466184572165941e-06, "loss": 0.0334, "step": 8800 }, { "epoch": 0.7386791663179041, "grad_norm": 20.708911895751953, "learning_rate": 6.369672994853957e-06, "loss": 0.0836, "step": 8825 }, { "epoch": 0.7407717418598811, "grad_norm": 0.15611207485198975, "learning_rate": 6.273750487325902e-06, "loss": 0.0737, "step": 8850 }, { "epoch": 0.7428643174018582, "grad_norm": 0.1677076667547226, "learning_rate": 6.178421195120661e-06, "loss": 0.0408, "step": 8875 }, { "epoch": 0.7449568929438353, "grad_norm": 0.5552803874015808, "learning_rate": 6.083689238139811e-06, "loss": 0.0557, "step": 8900 }, { "epoch": 0.7470494684858123, "grad_norm": 16.739295959472656, "learning_rate": 5.989558710469521e-06, "loss": 0.049, "step": 8925 }, { "epoch": 0.7491420440277894, "grad_norm": 9.993467330932617, "learning_rate": 5.896033680203663e-06, "loss": 0.0682, "step": 8950 }, { "epoch": 0.7512346195697664, "grad_norm": 3.3099191188812256, "learning_rate": 5.803118189267949e-06, "loss": 0.0681, "step": 8975 }, { "epoch": 0.7533271951117435, "grad_norm": 0.18636733293533325, "learning_rate": 5.710816253245299e-06, "loss": 0.0673, "step": 9000 }, { "epoch": 0.7554197706537206, "grad_norm": 7.4644670486450195, "learning_rate": 5.619131861202254e-06, "loss": 0.0821, "step": 9025 }, { "epoch": 0.7575123461956976, "grad_norm": 0.2912033200263977, "learning_rate": 5.528068975516616e-06, "loss": 0.0413, "step": 9050 }, { "epoch": 0.7596049217376747, "grad_norm": 0.06365963816642761, "learning_rate": 5.437631531706158e-06, "loss": 0.059, "step": 9075 }, { "epoch": 0.7616974972796517, "grad_norm": 0.11069577187299728, "learning_rate": 5.3478234382585995e-06, "loss": 0.0739, "step": 9100 }, { "epoch": 0.7637900728216288, "grad_norm": 10.11336612701416, "learning_rate": 5.258648576462628e-06, "loss": 0.0497, "step": 9125 }, { "epoch": 0.765882648363606, "grad_norm": 4.044532299041748, "learning_rate": 5.170110800240209e-06, "loss": 0.0353, "step": 9150 }, { "epoch": 0.767975223905583, "grad_norm": 1.726006031036377, "learning_rate": 5.082213935980005e-06, "loss": 0.0572, "step": 9175 }, { "epoch": 0.7700677994475601, "grad_norm": 11.652609825134277, "learning_rate": 4.994961782372016e-06, "loss": 0.0697, "step": 9200 }, { "epoch": 0.7721603749895372, "grad_norm": 6.057770252227783, "learning_rate": 4.908358110243394e-06, "loss": 0.0572, "step": 9225 }, { "epoch": 0.7742529505315142, "grad_norm": 0.0476180762052536, "learning_rate": 4.822406662395509e-06, "loss": 0.0812, "step": 9250 }, { "epoch": 0.7763455260734913, "grad_norm": 0.6360920071601868, "learning_rate": 4.737111153442146e-06, "loss": 0.0308, "step": 9275 }, { "epoch": 0.7784381016154683, "grad_norm": 0.23660625517368317, "learning_rate": 4.652475269649028e-06, "loss": 0.0298, "step": 9300 }, { "epoch": 0.7805306771574454, "grad_norm": 1.2915230989456177, "learning_rate": 4.568502668774446e-06, "loss": 0.0447, "step": 9325 }, { "epoch": 0.7826232526994225, "grad_norm": 0.3824012279510498, "learning_rate": 4.4851969799112304e-06, "loss": 0.0556, "step": 9350 }, { "epoch": 0.7847158282413995, "grad_norm": 1.5623775720596313, "learning_rate": 4.402561803329866e-06, "loss": 0.041, "step": 9375 }, { "epoch": 0.7868084037833766, "grad_norm": 4.520791053771973, "learning_rate": 4.320600710322933e-06, "loss": 0.0549, "step": 9400 }, { "epoch": 0.7889009793253536, "grad_norm": 0.22007611393928528, "learning_rate": 4.239317243050733e-06, "loss": 0.0376, "step": 9425 }, { "epoch": 0.7909935548673307, "grad_norm": 0.25925832986831665, "learning_rate": 4.158714914388246e-06, "loss": 0.0681, "step": 9450 }, { "epoch": 0.7930861304093078, "grad_norm": 0.07315188646316528, "learning_rate": 4.078797207773264e-06, "loss": 0.0424, "step": 9475 }, { "epoch": 0.7951787059512848, "grad_norm": 0.12042330950498581, "learning_rate": 3.999567577055874e-06, "loss": 0.0545, "step": 9500 }, { "epoch": 0.7972712814932619, "grad_norm": 0.18977761268615723, "learning_rate": 3.921029446349201e-06, "loss": 0.0643, "step": 9525 }, { "epoch": 0.7993638570352389, "grad_norm": 0.10226922482252121, "learning_rate": 3.843186209881386e-06, "loss": 0.0234, "step": 9550 }, { "epoch": 0.801456432577216, "grad_norm": 0.10693290829658508, "learning_rate": 3.766041231848949e-06, "loss": 0.0228, "step": 9575 }, { "epoch": 0.803549008119193, "grad_norm": 0.027522044256329536, "learning_rate": 3.6895978462713443e-06, "loss": 0.0549, "step": 9600 }, { "epoch": 0.8056415836611701, "grad_norm": 0.1407710462808609, "learning_rate": 3.613859356846916e-06, "loss": 0.0522, "step": 9625 }, { "epoch": 0.8077341592031473, "grad_norm": 0.6555646061897278, "learning_rate": 3.538829036810074e-06, "loss": 0.0292, "step": 9650 }, { "epoch": 0.8098267347451243, "grad_norm": 42.79521179199219, "learning_rate": 3.4645101287898975e-06, "loss": 0.0751, "step": 9675 }, { "epoch": 0.8119193102871014, "grad_norm": 0.2245454490184784, "learning_rate": 3.3909058446699205e-06, "loss": 0.0207, "step": 9700 }, { "epoch": 0.8140118858290785, "grad_norm": 0.31118181347846985, "learning_rate": 3.3180193654493854e-06, "loss": 0.0474, "step": 9725 }, { "epoch": 0.8161044613710555, "grad_norm": 0.10519175231456757, "learning_rate": 3.245853841105724e-06, "loss": 0.0384, "step": 9750 }, { "epoch": 0.8181970369130326, "grad_norm": 0.06231565773487091, "learning_rate": 3.174412390458457e-06, "loss": 0.0373, "step": 9775 }, { "epoch": 0.8202896124550096, "grad_norm": 1.0349115133285522, "learning_rate": 3.1036981010343737e-06, "loss": 0.0367, "step": 9800 }, { "epoch": 0.8223821879969867, "grad_norm": 0.1348392218351364, "learning_rate": 3.0337140289341337e-06, "loss": 0.0506, "step": 9825 }, { "epoch": 0.8244747635389638, "grad_norm": 0.06425004452466965, "learning_rate": 2.9644631987001425e-06, "loss": 0.0427, "step": 9850 }, { "epoch": 0.8265673390809408, "grad_norm": 4.405979633331299, "learning_rate": 2.895948603185894e-06, "loss": 0.0655, "step": 9875 }, { "epoch": 0.8286599146229179, "grad_norm": 1.3921059370040894, "learning_rate": 2.828173203426572e-06, "loss": 0.1037, "step": 9900 }, { "epoch": 0.8307524901648949, "grad_norm": 7.713419437408447, "learning_rate": 2.7611399285111252e-06, "loss": 0.0571, "step": 9925 }, { "epoch": 0.832845065706872, "grad_norm": 0.28889116644859314, "learning_rate": 2.6948516754556452e-06, "loss": 0.0992, "step": 9950 }, { "epoch": 0.8349376412488491, "grad_norm": 0.10869034379720688, "learning_rate": 2.6293113090781953e-06, "loss": 0.0404, "step": 9975 }, { "epoch": 0.8370302167908261, "grad_norm": 6.920375347137451, "learning_rate": 2.564521661874968e-06, "loss": 0.0712, "step": 10000 }, { "epoch": 0.8391227923328032, "grad_norm": 0.0789741575717926, "learning_rate": 2.5004855338979074e-06, "loss": 0.0727, "step": 10025 }, { "epoch": 0.8412153678747802, "grad_norm": 0.7709121108055115, "learning_rate": 2.437205692633653e-06, "loss": 0.0676, "step": 10050 }, { "epoch": 0.8433079434167573, "grad_norm": 0.08355044573545456, "learning_rate": 2.374684872883988e-06, "loss": 0.0442, "step": 10075 }, { "epoch": 0.8454005189587344, "grad_norm": 7.519137859344482, "learning_rate": 2.3129257766475967e-06, "loss": 0.0689, "step": 10100 }, { "epoch": 0.8474930945007115, "grad_norm": 0.05053620785474777, "learning_rate": 2.2519310730033305e-06, "loss": 0.039, "step": 10125 }, { "epoch": 0.8495856700426886, "grad_norm": 0.4234563708305359, "learning_rate": 2.1917033979948377e-06, "loss": 0.042, "step": 10150 }, { "epoch": 0.8516782455846656, "grad_norm": 0.931300163269043, "learning_rate": 2.1322453545166377e-06, "loss": 0.0359, "step": 10175 }, { "epoch": 0.8537708211266427, "grad_norm": 0.06214135140180588, "learning_rate": 2.0735595122016417e-06, "loss": 0.0401, "step": 10200 }, { "epoch": 0.8558633966686198, "grad_norm": 4.354775905609131, "learning_rate": 2.0156484073100845e-06, "loss": 0.1028, "step": 10225 }, { "epoch": 0.8579559722105968, "grad_norm": 0.26386329531669617, "learning_rate": 1.9585145426199358e-06, "loss": 0.0478, "step": 10250 }, { "epoch": 0.8600485477525739, "grad_norm": 0.310926228761673, "learning_rate": 1.9021603873187077e-06, "loss": 0.0589, "step": 10275 }, { "epoch": 0.862141123294551, "grad_norm": 29.527650833129883, "learning_rate": 1.8465883768967718e-06, "loss": 0.0169, "step": 10300 }, { "epoch": 0.864233698836528, "grad_norm": 0.030782422050833702, "learning_rate": 1.7918009130420744e-06, "loss": 0.0451, "step": 10325 }, { "epoch": 0.8663262743785051, "grad_norm": 0.1177581399679184, "learning_rate": 1.7378003635363727e-06, "loss": 0.0532, "step": 10350 }, { "epoch": 0.8684188499204821, "grad_norm": 0.15917450189590454, "learning_rate": 1.684589062152866e-06, "loss": 0.0461, "step": 10375 }, { "epoch": 0.8705114254624592, "grad_norm": 0.17231865227222443, "learning_rate": 1.6321693085553803e-06, "loss": 0.0732, "step": 10400 }, { "epoch": 0.8726040010044362, "grad_norm": 0.4389756917953491, "learning_rate": 1.5805433681989434e-06, "loss": 0.058, "step": 10425 }, { "epoch": 0.8746965765464133, "grad_norm": 0.16644658148288727, "learning_rate": 1.529713472231904e-06, "loss": 0.0241, "step": 10450 }, { "epoch": 0.8767891520883904, "grad_norm": 0.12109541147947311, "learning_rate": 1.4796818173994853e-06, "loss": 0.0538, "step": 10475 }, { "epoch": 0.8788817276303674, "grad_norm": 0.07239028066396713, "learning_rate": 1.4304505659488754e-06, "loss": 0.0274, "step": 10500 }, { "epoch": 0.8809743031723445, "grad_norm": 0.200308695435524, "learning_rate": 1.3820218455357416e-06, "loss": 0.038, "step": 10525 }, { "epoch": 0.8830668787143215, "grad_norm": 18.13840675354004, "learning_rate": 1.3343977491323212e-06, "loss": 0.0625, "step": 10550 }, { "epoch": 0.8851594542562986, "grad_norm": 0.20215554535388947, "learning_rate": 1.2875803349369288e-06, "loss": 0.0481, "step": 10575 }, { "epoch": 0.8872520297982757, "grad_norm": 16.416807174682617, "learning_rate": 1.2415716262850385e-06, "loss": 0.0559, "step": 10600 }, { "epoch": 0.8893446053402528, "grad_norm": 0.40492701530456543, "learning_rate": 1.196373611561814e-06, "loss": 0.0478, "step": 10625 }, { "epoch": 0.8914371808822299, "grad_norm": 0.08658579736948013, "learning_rate": 1.1519882441161933e-06, "loss": 0.0307, "step": 10650 }, { "epoch": 0.893529756424207, "grad_norm": 6.548172473907471, "learning_rate": 1.108417442176468e-06, "loss": 0.0499, "step": 10675 }, { "epoch": 0.895622331966184, "grad_norm": 104.8302001953125, "learning_rate": 1.0656630887673635e-06, "loss": 0.0668, "step": 10700 }, { "epoch": 0.8977149075081611, "grad_norm": 0.05074332281947136, "learning_rate": 1.0237270316286896e-06, "loss": 0.055, "step": 10725 }, { "epoch": 0.8998074830501381, "grad_norm": 0.9664784669876099, "learning_rate": 9.826110831354562e-07, "loss": 0.0458, "step": 10750 }, { "epoch": 0.9019000585921152, "grad_norm": 8.746543884277344, "learning_rate": 9.42317020219572e-07, "loss": 0.0525, "step": 10775 }, { "epoch": 0.9039926341340923, "grad_norm": 8.132631301879883, "learning_rate": 9.028465842930223e-07, "loss": 0.0647, "step": 10800 }, { "epoch": 0.9060852096760693, "grad_norm": 8.856213569641113, "learning_rate": 8.64201481172644e-07, "loss": 0.0679, "step": 10825 }, { "epoch": 0.9081777852180464, "grad_norm": 9.962249755859375, "learning_rate": 8.26383381006366e-07, "loss": 0.0541, "step": 10850 }, { "epoch": 0.9102703607600234, "grad_norm": 0.11603320389986038, "learning_rate": 7.893939182010669e-07, "loss": 0.0484, "step": 10875 }, { "epoch": 0.9123629363020005, "grad_norm": 0.10191322863101959, "learning_rate": 7.532346913519051e-07, "loss": 0.0217, "step": 10900 }, { "epoch": 0.9144555118439776, "grad_norm": 0.037384625524282455, "learning_rate": 7.179072631732608e-07, "loss": 0.0461, "step": 10925 }, { "epoch": 0.9165480873859546, "grad_norm": 4.9774932861328125, "learning_rate": 6.834131604311767e-07, "loss": 0.0635, "step": 10950 }, { "epoch": 0.9186406629279317, "grad_norm": 0.1667776256799698, "learning_rate": 6.497538738773945e-07, "loss": 0.0632, "step": 10975 }, { "epoch": 0.9207332384699087, "grad_norm": 0.22654391825199127, "learning_rate": 6.169308581849076e-07, "loss": 0.0358, "step": 11000 }, { "epoch": 0.9228258140118858, "grad_norm": 0.11612854152917862, "learning_rate": 5.849455318851149e-07, "loss": 0.0542, "step": 11025 }, { "epoch": 0.9249183895538629, "grad_norm": 35.777793884277344, "learning_rate": 5.537992773064949e-07, "loss": 0.0661, "step": 11050 }, { "epoch": 0.9270109650958399, "grad_norm": 31.49355125427246, "learning_rate": 5.234934405148818e-07, "loss": 0.0461, "step": 11075 }, { "epoch": 0.929103540637817, "grad_norm": 0.2999427318572998, "learning_rate": 4.94029331255279e-07, "loss": 0.0336, "step": 11100 }, { "epoch": 0.9311961161797941, "grad_norm": 1.5943710803985596, "learning_rate": 4.65408222895265e-07, "loss": 0.1199, "step": 11125 }, { "epoch": 0.9332886917217712, "grad_norm": 0.04734862968325615, "learning_rate": 4.3763135236996045e-07, "loss": 0.0506, "step": 11150 }, { "epoch": 0.9353812672637483, "grad_norm": 7.017291069030762, "learning_rate": 4.1069992012855664e-07, "loss": 0.0793, "step": 11175 }, { "epoch": 0.9374738428057253, "grad_norm": 0.1956498920917511, "learning_rate": 3.8461509008245637e-07, "loss": 0.0557, "step": 11200 }, { "epoch": 0.9395664183477024, "grad_norm": 14.131322860717773, "learning_rate": 3.5937798955495427e-07, "loss": 0.0493, "step": 11225 }, { "epoch": 0.9416589938896794, "grad_norm": 0.08296482264995575, "learning_rate": 3.349897092325338e-07, "loss": 0.0785, "step": 11250 }, { "epoch": 0.9437515694316565, "grad_norm": 6.673073768615723, "learning_rate": 3.1145130311771134e-07, "loss": 0.0728, "step": 11275 }, { "epoch": 0.9458441449736336, "grad_norm": 0.3945884704589844, "learning_rate": 2.887637884834993e-07, "loss": 0.0453, "step": 11300 }, { "epoch": 0.9479367205156106, "grad_norm": 0.04271361231803894, "learning_rate": 2.6692814582943707e-07, "loss": 0.0378, "step": 11325 }, { "epoch": 0.9500292960575877, "grad_norm": 6.191662788391113, "learning_rate": 2.45945318839218e-07, "loss": 0.0506, "step": 11350 }, { "epoch": 0.9521218715995647, "grad_norm": 0.21662096679210663, "learning_rate": 2.2581621433989785e-07, "loss": 0.057, "step": 11375 }, { "epoch": 0.9542144471415418, "grad_norm": 0.04659692198038101, "learning_rate": 2.0654170226271696e-07, "loss": 0.0632, "step": 11400 }, { "epoch": 0.9563070226835189, "grad_norm": 7.704138278961182, "learning_rate": 1.8812261560549272e-07, "loss": 0.0862, "step": 11425 }, { "epoch": 0.9583995982254959, "grad_norm": 0.09132679551839828, "learning_rate": 1.7055975039662387e-07, "loss": 0.0502, "step": 11450 }, { "epoch": 0.960492173767473, "grad_norm": 12.371918678283691, "learning_rate": 1.5385386566069137e-07, "loss": 0.0534, "step": 11475 }, { "epoch": 0.96258474930945, "grad_norm": 0.09234387427568436, "learning_rate": 1.3800568338565133e-07, "loss": 0.0764, "step": 11500 }, { "epoch": 0.9646773248514271, "grad_norm": 0.1503359079360962, "learning_rate": 1.230158884916266e-07, "loss": 0.0589, "step": 11525 }, { "epoch": 0.9667699003934042, "grad_norm": 7.913686752319336, "learning_rate": 1.088851288013193e-07, "loss": 0.0655, "step": 11550 }, { "epoch": 0.9688624759353812, "grad_norm": 8.503890991210938, "learning_rate": 9.56140150120044e-08, "loss": 0.0523, "step": 11575 }, { "epoch": 0.9709550514773583, "grad_norm": 0.07973815500736237, "learning_rate": 8.320312066913972e-08, "loss": 0.0239, "step": 11600 }, { "epoch": 0.9730476270193354, "grad_norm": 0.23680634796619415, "learning_rate": 7.165298214157457e-08, "loss": 0.0381, "step": 11625 }, { "epoch": 0.9751402025613125, "grad_norm": 0.10261060297489166, "learning_rate": 6.096409859837727e-08, "loss": 0.1002, "step": 11650 }, { "epoch": 0.9772327781032896, "grad_norm": 0.22620265185832977, "learning_rate": 5.113693198725456e-08, "loss": 0.0493, "step": 11675 }, { "epoch": 0.9793253536452666, "grad_norm": 7.392359256744385, "learning_rate": 4.2171907014596504e-08, "loss": 0.0553, "step": 11700 }, { "epoch": 0.9814179291872437, "grad_norm": 0.09146345406770706, "learning_rate": 3.406941112710449e-08, "loss": 0.0752, "step": 11725 }, { "epoch": 0.9835105047292207, "grad_norm": 0.03258276730775833, "learning_rate": 2.6829794495066842e-08, "loss": 0.0422, "step": 11750 }, { "epoch": 0.9856030802711978, "grad_norm": 15.740374565124512, "learning_rate": 2.0453369997215366e-08, "loss": 0.0587, "step": 11775 }, { "epoch": 0.9876956558131749, "grad_norm": 0.10255517065525055, "learning_rate": 1.4940413207198413e-08, "loss": 0.0378, "step": 11800 }, { "epoch": 0.9897882313551519, "grad_norm": 28.702392578125, "learning_rate": 1.0291162381685927e-08, "loss": 0.043, "step": 11825 }, { "epoch": 0.991880806897129, "grad_norm": 0.1069909930229187, "learning_rate": 6.5058184500577104e-09, "loss": 0.0577, "step": 11850 }, { "epoch": 0.993973382439106, "grad_norm": 62.75130844116211, "learning_rate": 3.5845450057303554e-09, "loss": 0.0672, "step": 11875 }, { "epoch": 0.9960659579810831, "grad_norm": 1.7898138761520386, "learning_rate": 1.5274682990762402e-09, "loss": 0.0607, "step": 11900 }, { "epoch": 0.9981585335230602, "grad_norm": 0.3523304760456085, "learning_rate": 3.346772319834379e-10, "loss": 0.0546, "step": 11925 }, { "epoch": 1.0, "eval_accuracy": 0.9794339738473816, "eval_auc": 0.9957081876919603, "eval_f1": 0.9853826686447335, "eval_loss": 0.09541802108287811, "eval_precision": 0.9808886765408504, "eval_recall": 0.9899180291938807, "eval_runtime": 3282.7457, "eval_samples_per_second": 9.924, "eval_steps_per_second": 0.207, "step": 11947 } ], "logging_steps": 25, "max_steps": 11947, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 500, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 5, "early_stopping_threshold": 0.01 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 6.602150538303662e+19, "train_batch_size": 24, "trial_name": null, "trial_params": null }