{ "best_metric": null, "best_model_checkpoint": null, "epoch": 2.0, "eval_steps": 500, "global_step": 2150, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.01, "learning_rate": 3.999863099129187e-06, "loss": 1.1314, "step": 10 }, { "epoch": 0.02, "learning_rate": 3.999306971462393e-06, "loss": 0.5172, "step": 20 }, { "epoch": 0.03, "learning_rate": 3.998323179562853e-06, "loss": 0.3774, "step": 30 }, { "epoch": 0.04, "learning_rate": 3.996911933869793e-06, "loss": 0.3489, "step": 40 }, { "epoch": 0.05, "learning_rate": 3.995073536257471e-06, "loss": 0.3278, "step": 50 }, { "epoch": 0.06, "learning_rate": 3.992808379970601e-06, "loss": 0.3373, "step": 60 }, { "epoch": 0.07, "learning_rate": 3.990116949540244e-06, "loss": 0.3649, "step": 70 }, { "epoch": 0.07, "learning_rate": 3.986999820680159e-06, "loss": 0.3112, "step": 80 }, { "epoch": 0.08, "learning_rate": 3.983457660163652e-06, "loss": 0.3647, "step": 90 }, { "epoch": 0.09, "learning_rate": 3.979491225680954e-06, "loss": 0.3191, "step": 100 }, { "epoch": 0.1, "learning_rate": 3.975101365677144e-06, "loss": 0.3062, "step": 110 }, { "epoch": 0.11, "learning_rate": 3.970289019170659e-06, "loss": 0.356, "step": 120 }, { "epoch": 0.12, "learning_rate": 3.965055215552438e-06, "loss": 0.2979, "step": 130 }, { "epoch": 0.13, "learning_rate": 3.95940107436572e-06, "loss": 0.278, "step": 140 }, { "epoch": 0.14, "learning_rate": 3.9533278050665775e-06, "loss": 0.3105, "step": 150 }, { "epoch": 0.15, "learning_rate": 3.946836706765197e-06, "loss": 0.2607, "step": 160 }, { "epoch": 0.16, "learning_rate": 3.9399291679479965e-06, "loss": 0.2657, "step": 170 }, { "epoch": 0.17, "learning_rate": 3.9326066661806184e-06, "loss": 0.2739, "step": 180 }, { "epoch": 0.18, "learning_rate": 3.924870767791867e-06, "loss": 0.2459, "step": 190 }, { "epoch": 0.19, "learning_rate": 3.916723127538665e-06, "loss": 0.2262, "step": 200 }, { "epoch": 0.2, "learning_rate": 3.908165488252087e-06, "loss": 0.2293, "step": 210 }, { "epoch": 0.2, "learning_rate": 3.899199680464556e-06, "loss": 0.3081, "step": 220 }, { "epoch": 0.21, "learning_rate": 3.889827622018288e-06, "loss": 0.2413, "step": 230 }, { "epoch": 0.22, "learning_rate": 3.880051317655047e-06, "loss": 0.3194, "step": 240 }, { "epoch": 0.23, "learning_rate": 3.8698728585873195e-06, "loss": 0.3346, "step": 250 }, { "epoch": 0.24, "learning_rate": 3.859294422050993e-06, "loss": 0.2913, "step": 260 }, { "epoch": 0.25, "learning_rate": 3.84831827083963e-06, "loss": 0.2494, "step": 270 }, { "epoch": 0.26, "learning_rate": 3.8369467528204436e-06, "loss": 0.2894, "step": 280 }, { "epoch": 0.27, "learning_rate": 3.825182300432072e-06, "loss": 0.2864, "step": 290 }, { "epoch": 0.28, "learning_rate": 3.813027430164269e-06, "loss": 0.2615, "step": 300 }, { "epoch": 0.29, "learning_rate": 3.800484742019604e-06, "loss": 0.2734, "step": 310 }, { "epoch": 0.3, "learning_rate": 3.787556918957312e-06, "loss": 0.2794, "step": 320 }, { "epoch": 0.31, "learning_rate": 3.77424672631939e-06, "loss": 0.2784, "step": 330 }, { "epoch": 0.32, "learning_rate": 3.7605570112390705e-06, "loss": 0.2508, "step": 340 }, { "epoch": 0.33, "learning_rate": 3.746490702031805e-06, "loss": 0.2763, "step": 350 }, { "epoch": 0.33, "learning_rate": 3.732050807568877e-06, "loss": 0.2565, "step": 360 }, { "epoch": 0.34, "learning_rate": 3.717240416633789e-06, "loss": 0.2558, "step": 370 }, { "epoch": 0.35, "learning_rate": 3.702062697261546e-06, "loss": 0.2347, "step": 380 }, { "epoch": 0.36, "learning_rate": 3.686520896061002e-06, "loss": 0.3922, "step": 390 }, { "epoch": 0.37, "learning_rate": 3.67061833752038e-06, "loss": 0.2458, "step": 400 }, { "epoch": 0.38, "learning_rate": 3.654358423296153e-06, "loss": 0.2167, "step": 410 }, { "epoch": 0.39, "learning_rate": 3.6377446314854004e-06, "loss": 0.2613, "step": 420 }, { "epoch": 0.4, "learning_rate": 3.620780515881827e-06, "loss": 0.3321, "step": 430 }, { "epoch": 0.41, "learning_rate": 3.6034697052155813e-06, "loss": 0.2842, "step": 440 }, { "epoch": 0.42, "learning_rate": 3.5858159023770486e-06, "loss": 0.3236, "step": 450 }, { "epoch": 0.43, "learning_rate": 3.5678228836247796e-06, "loss": 0.2987, "step": 460 }, { "epoch": 0.44, "learning_rate": 3.549494497777724e-06, "loss": 0.2398, "step": 470 }, { "epoch": 0.45, "learning_rate": 3.5308346653919463e-06, "loss": 0.2345, "step": 480 }, { "epoch": 0.46, "learning_rate": 3.511847377921993e-06, "loss": 0.234, "step": 490 }, { "epoch": 0.47, "learning_rate": 3.4925366968670946e-06, "loss": 0.2587, "step": 500 }, { "epoch": 0.47, "learning_rate": 3.4729067529023884e-06, "loss": 0.2433, "step": 510 }, { "epoch": 0.48, "learning_rate": 3.4529617449953378e-06, "loss": 0.2265, "step": 520 }, { "epoch": 0.49, "learning_rate": 3.4327059395075504e-06, "loss": 0.2279, "step": 530 }, { "epoch": 0.5, "learning_rate": 3.412143669282175e-06, "loss": 0.2195, "step": 540 }, { "epoch": 0.51, "learning_rate": 3.391279332717078e-06, "loss": 0.2684, "step": 550 }, { "epoch": 0.52, "learning_rate": 3.3701173928240006e-06, "loss": 0.2246, "step": 560 }, { "epoch": 0.53, "learning_rate": 3.348662376273894e-06, "loss": 0.2145, "step": 570 }, { "epoch": 0.54, "learning_rate": 3.326918872428633e-06, "loss": 0.2368, "step": 580 }, { "epoch": 0.55, "learning_rate": 3.3048915323593246e-06, "loss": 0.28, "step": 590 }, { "epoch": 0.56, "learning_rate": 3.282585067851414e-06, "loss": 0.2498, "step": 600 }, { "epoch": 0.57, "learning_rate": 3.2600042503968075e-06, "loss": 0.2301, "step": 610 }, { "epoch": 0.58, "learning_rate": 3.2371539101732144e-06, "loss": 0.2357, "step": 620 }, { "epoch": 0.59, "learning_rate": 3.214038935010949e-06, "loss": 0.2503, "step": 630 }, { "epoch": 0.6, "learning_rate": 3.190664269347387e-06, "loss": 0.2275, "step": 640 }, { "epoch": 0.6, "learning_rate": 3.1670349131693243e-06, "loss": 0.2249, "step": 650 }, { "epoch": 0.61, "learning_rate": 3.1431559209434456e-06, "loss": 0.2467, "step": 660 }, { "epoch": 0.62, "learning_rate": 3.1190324005351435e-06, "loss": 0.222, "step": 670 }, { "epoch": 0.63, "learning_rate": 3.0946695121159145e-06, "loss": 0.2305, "step": 680 }, { "epoch": 0.64, "learning_rate": 3.0700724670595657e-06, "loss": 0.2312, "step": 690 }, { "epoch": 0.65, "learning_rate": 3.045246526827469e-06, "loss": 0.2277, "step": 700 }, { "epoch": 0.66, "learning_rate": 3.020197001843103e-06, "loss": 0.2352, "step": 710 }, { "epoch": 0.67, "learning_rate": 2.9949292503561197e-06, "loss": 0.2291, "step": 720 }, { "epoch": 0.68, "learning_rate": 2.9694486772961792e-06, "loss": 0.2214, "step": 730 }, { "epoch": 0.69, "learning_rate": 2.9437607331168074e-06, "loss": 0.2357, "step": 740 }, { "epoch": 0.7, "learning_rate": 2.9178709126295017e-06, "loss": 0.2421, "step": 750 }, { "epoch": 0.71, "learning_rate": 2.8917847538283657e-06, "loss": 0.226, "step": 760 }, { "epoch": 0.72, "learning_rate": 2.8655078367054886e-06, "loss": 0.2302, "step": 770 }, { "epoch": 0.73, "learning_rate": 2.839045782057358e-06, "loss": 0.2347, "step": 780 }, { "epoch": 0.73, "learning_rate": 2.8124042502825324e-06, "loss": 0.2153, "step": 790 }, { "epoch": 0.74, "learning_rate": 2.7855889401708446e-06, "loss": 0.2327, "step": 800 }, { "epoch": 0.75, "learning_rate": 2.7586055876844007e-06, "loss": 0.2294, "step": 810 }, { "epoch": 0.76, "learning_rate": 2.731459964730618e-06, "loss": 0.2214, "step": 820 }, { "epoch": 0.77, "learning_rate": 2.7041578779275793e-06, "loss": 0.2056, "step": 830 }, { "epoch": 0.78, "learning_rate": 2.6767051673619604e-06, "loss": 0.2192, "step": 840 }, { "epoch": 0.79, "learning_rate": 2.6491077053397988e-06, "loss": 0.233, "step": 850 }, { "epoch": 0.8, "learning_rate": 2.621371395130372e-06, "loss": 0.2462, "step": 860 }, { "epoch": 0.81, "learning_rate": 2.593502169703453e-06, "loss": 0.2045, "step": 870 }, { "epoch": 0.82, "learning_rate": 2.5655059904602063e-06, "loss": 0.2243, "step": 880 }, { "epoch": 0.83, "learning_rate": 2.5373888459580084e-06, "loss": 0.2346, "step": 890 }, { "epoch": 0.84, "learning_rate": 2.5091567506294633e-06, "loss": 0.2278, "step": 900 }, { "epoch": 0.85, "learning_rate": 2.4808157434958722e-06, "loss": 0.2302, "step": 910 }, { "epoch": 0.86, "learning_rate": 2.4523718868754487e-06, "loss": 0.2115, "step": 920 }, { "epoch": 0.87, "learning_rate": 2.423831265086556e-06, "loss": 0.2168, "step": 930 }, { "epoch": 0.87, "learning_rate": 2.3951999831462292e-06, "loss": 0.2111, "step": 940 }, { "epoch": 0.88, "learning_rate": 2.3664841654642746e-06, "loss": 0.2411, "step": 950 }, { "epoch": 0.89, "learning_rate": 2.3376899545332223e-06, "loss": 0.2249, "step": 960 }, { "epoch": 0.9, "learning_rate": 2.308823509614408e-06, "loss": 0.225, "step": 970 }, { "epoch": 0.91, "learning_rate": 2.2798910054204653e-06, "loss": 0.2216, "step": 980 }, { "epoch": 0.92, "learning_rate": 2.250898630794521e-06, "loss": 0.2102, "step": 990 }, { "epoch": 0.93, "learning_rate": 2.22185258738636e-06, "loss": 0.2094, "step": 1000 }, { "epoch": 0.94, "learning_rate": 2.1927590883258547e-06, "loss": 0.2102, "step": 1010 }, { "epoch": 0.95, "learning_rate": 2.1636243568939384e-06, "loss": 0.2128, "step": 1020 }, { "epoch": 0.96, "learning_rate": 2.1344546251914033e-06, "loss": 0.2162, "step": 1030 }, { "epoch": 0.97, "learning_rate": 2.1052561328058207e-06, "loss": 0.2233, "step": 1040 }, { "epoch": 0.98, "learning_rate": 2.0760351254768503e-06, "loss": 0.2147, "step": 1050 }, { "epoch": 0.99, "learning_rate": 2.046797853760238e-06, "loss": 0.226, "step": 1060 }, { "epoch": 1.0, "learning_rate": 2.017550571690784e-06, "loss": 0.2255, "step": 1070 }, { "epoch": 1.0, "learning_rate": 1.9882995354445663e-06, "loss": 0.2309, "step": 1080 }, { "epoch": 1.01, "learning_rate": 1.959051002000704e-06, "loss": 0.2087, "step": 1090 }, { "epoch": 1.02, "learning_rate": 1.9298112278029507e-06, "loss": 0.2276, "step": 1100 }, { "epoch": 1.03, "learning_rate": 1.9005864674214033e-06, "loss": 0.2034, "step": 1110 }, { "epoch": 1.04, "learning_rate": 1.8713829722146094e-06, "loss": 0.2198, "step": 1120 }, { "epoch": 1.05, "learning_rate": 1.8422069889923646e-06, "loss": 0.2129, "step": 1130 }, { "epoch": 1.06, "learning_rate": 1.8130647586794766e-06, "loss": 0.2184, "step": 1140 }, { "epoch": 1.07, "learning_rate": 1.7839625149807963e-06, "loss": 0.2092, "step": 1150 }, { "epoch": 1.08, "learning_rate": 1.7549064830477907e-06, "loss": 0.2138, "step": 1160 }, { "epoch": 1.09, "learning_rate": 1.7259028781469385e-06, "loss": 0.2156, "step": 1170 }, { "epoch": 1.1, "learning_rate": 1.696957904330251e-06, "loss": 0.208, "step": 1180 }, { "epoch": 1.11, "learning_rate": 1.6680777531081844e-06, "loss": 0.2033, "step": 1190 }, { "epoch": 1.12, "learning_rate": 1.6392686021252372e-06, "loss": 0.2117, "step": 1200 }, { "epoch": 1.13, "learning_rate": 1.6105366138385156e-06, "loss": 0.2078, "step": 1210 }, { "epoch": 1.13, "learning_rate": 1.5818879341995414e-06, "loss": 0.2084, "step": 1220 }, { "epoch": 1.14, "learning_rate": 1.553328691339598e-06, "loss": 0.2039, "step": 1230 }, { "epoch": 1.15, "learning_rate": 1.524864994258884e-06, "loss": 0.2106, "step": 1240 }, { "epoch": 1.16, "learning_rate": 1.49650293151976e-06, "loss": 0.2153, "step": 1250 }, { "epoch": 1.17, "learning_rate": 1.4682485699443666e-06, "loss": 0.2027, "step": 1260 }, { "epoch": 1.18, "learning_rate": 1.4401079533168934e-06, "loss": 0.2031, "step": 1270 }, { "epoch": 1.19, "learning_rate": 1.4120871010907762e-06, "loss": 0.2194, "step": 1280 }, { "epoch": 1.2, "learning_rate": 1.3841920071010992e-06, "loss": 0.2286, "step": 1290 }, { "epoch": 1.21, "learning_rate": 1.3564286382824736e-06, "loss": 0.2033, "step": 1300 }, { "epoch": 1.22, "learning_rate": 1.3288029333926765e-06, "loss": 0.2149, "step": 1310 }, { "epoch": 1.23, "learning_rate": 1.3013208017423116e-06, "loss": 0.2059, "step": 1320 }, { "epoch": 1.24, "learning_rate": 1.2739881219307687e-06, "loss": 0.2053, "step": 1330 }, { "epoch": 1.25, "learning_rate": 1.2468107405887619e-06, "loss": 0.2042, "step": 1340 }, { "epoch": 1.26, "learning_rate": 1.2197944711276895e-06, "loss": 0.202, "step": 1350 }, { "epoch": 1.27, "learning_rate": 1.1929450924961137e-06, "loss": 0.2028, "step": 1360 }, { "epoch": 1.27, "learning_rate": 1.1662683479436108e-06, "loss": 0.2033, "step": 1370 }, { "epoch": 1.28, "learning_rate": 1.1397699437922481e-06, "loss": 0.2078, "step": 1380 }, { "epoch": 1.29, "learning_rate": 1.113455548215967e-06, "loss": 0.2121, "step": 1390 }, { "epoch": 1.3, "learning_rate": 1.0873307900281313e-06, "loss": 0.2022, "step": 1400 }, { "epoch": 1.31, "learning_rate": 1.0614012574774804e-06, "loss": 0.202, "step": 1410 }, { "epoch": 1.32, "learning_rate": 1.0356724970527733e-06, "loss": 0.2127, "step": 1420 }, { "epoch": 1.33, "learning_rate": 1.0101500122963597e-06, "loss": 0.2242, "step": 1430 }, { "epoch": 1.34, "learning_rate": 9.848392626269371e-07, "loss": 0.2067, "step": 1440 }, { "epoch": 1.35, "learning_rate": 9.597456621717469e-07, "loss": 0.2021, "step": 1450 }, { "epoch": 1.36, "learning_rate": 9.34874578608458e-07, "loss": 0.2099, "step": 1460 }, { "epoch": 1.37, "learning_rate": 9.102313320169883e-07, "loss": 0.2022, "step": 1470 }, { "epoch": 1.38, "learning_rate": 8.858211937415074e-07, "loss": 0.2077, "step": 1480 }, { "epoch": 1.39, "learning_rate": 8.616493852628621e-07, "loss": 0.2117, "step": 1490 }, { "epoch": 1.4, "learning_rate": 8.377210770816665e-07, "loss": 0.2213, "step": 1500 }, { "epoch": 1.4, "learning_rate": 8.140413876123054e-07, "loss": 0.2151, "step": 1510 }, { "epoch": 1.41, "learning_rate": 7.906153820880672e-07, "loss": 0.2237, "step": 1520 }, { "epoch": 1.42, "learning_rate": 7.674480714776644e-07, "loss": 0.2219, "step": 1530 }, { "epoch": 1.43, "learning_rate": 7.445444114133532e-07, "loss": 0.2137, "step": 1540 }, { "epoch": 1.44, "learning_rate": 7.219093011308917e-07, "loss": 0.2024, "step": 1550 }, { "epoch": 1.45, "learning_rate": 6.995475824215667e-07, "loss": 0.2166, "step": 1560 }, { "epoch": 1.46, "learning_rate": 6.774640385965009e-07, "loss": 0.2072, "step": 1570 }, { "epoch": 1.47, "learning_rate": 6.556633934634725e-07, "loss": 0.2074, "step": 1580 }, { "epoch": 1.48, "learning_rate": 6.341503103164694e-07, "loss": 0.2041, "step": 1590 }, { "epoch": 1.49, "learning_rate": 6.129293909381725e-07, "loss": 0.2039, "step": 1600 }, { "epoch": 1.5, "learning_rate": 5.920051746156112e-07, "loss": 0.2055, "step": 1610 }, { "epoch": 1.51, "learning_rate": 5.713821371691816e-07, "loss": 0.2295, "step": 1620 }, { "epoch": 1.52, "learning_rate": 5.510646899952378e-07, "loss": 0.202, "step": 1630 }, { "epoch": 1.53, "learning_rate": 5.310571791224696e-07, "loss": 0.2021, "step": 1640 }, { "epoch": 1.53, "learning_rate": 5.113638842822592e-07, "loss": 0.2122, "step": 1650 }, { "epoch": 1.54, "learning_rate": 4.919890179932172e-07, "loss": 0.2132, "step": 1660 }, { "epoch": 1.55, "learning_rate": 4.729367246601026e-07, "loss": 0.2019, "step": 1670 }, { "epoch": 1.56, "learning_rate": 4.5421107968730645e-07, "loss": 0.2081, "step": 1680 }, { "epoch": 1.57, "learning_rate": 4.358160886070965e-07, "loss": 0.2046, "step": 1690 }, { "epoch": 1.58, "learning_rate": 4.177556862228135e-07, "loss": 0.2068, "step": 1700 }, { "epoch": 1.59, "learning_rate": 4.000337357671859e-07, "loss": 0.2239, "step": 1710 }, { "epoch": 1.6, "learning_rate": 3.8265402807596515e-07, "loss": 0.2044, "step": 1720 }, { "epoch": 1.61, "learning_rate": 3.656202807770399e-07, "loss": 0.2019, "step": 1730 }, { "epoch": 1.62, "learning_rate": 3.489361374952113e-07, "loss": 0.2033, "step": 1740 }, { "epoch": 1.63, "learning_rate": 3.326051670728043e-07, "loss": 0.206, "step": 1750 }, { "epoch": 1.64, "learning_rate": 3.166308628062626e-07, "loss": 0.2181, "step": 1760 }, { "epoch": 1.65, "learning_rate": 3.0101664169891507e-07, "loss": 0.2072, "step": 1770 }, { "epoch": 1.66, "learning_rate": 2.8576584373005807e-07, "loss": 0.2048, "step": 1780 }, { "epoch": 1.67, "learning_rate": 2.7088173114050983e-07, "loss": 0.2211, "step": 1790 }, { "epoch": 1.67, "learning_rate": 2.5636748773479965e-07, "loss": 0.2159, "step": 1800 }, { "epoch": 1.68, "learning_rate": 2.4222621820013156e-07, "loss": 0.2094, "step": 1810 }, { "epoch": 1.69, "learning_rate": 2.284609474422703e-07, "loss": 0.2151, "step": 1820 }, { "epoch": 1.7, "learning_rate": 2.1507461993849875e-07, "loss": 0.2272, "step": 1830 }, { "epoch": 1.71, "learning_rate": 2.0207009910777284e-07, "loss": 0.2034, "step": 1840 }, { "epoch": 1.72, "learning_rate": 1.8945016669821846e-07, "loss": 0.2025, "step": 1850 }, { "epoch": 1.73, "learning_rate": 1.7721752219210218e-07, "loss": 0.2102, "step": 1860 }, { "epoch": 1.74, "learning_rate": 1.653747822283893e-07, "loss": 0.2024, "step": 1870 }, { "epoch": 1.75, "learning_rate": 1.539244800430324e-07, "loss": 0.203, "step": 1880 }, { "epoch": 1.76, "learning_rate": 1.4286906492709538e-07, "loss": 0.2153, "step": 1890 }, { "epoch": 1.77, "learning_rate": 1.3221090170283388e-07, "loss": 0.2119, "step": 1900 }, { "epoch": 1.78, "learning_rate": 1.2195227021784727e-07, "loss": 0.2019, "step": 1910 }, { "epoch": 1.79, "learning_rate": 1.1209536485740278e-07, "loss": 0.2044, "step": 1920 }, { "epoch": 1.8, "learning_rate": 1.0264229407504333e-07, "loss": 0.2053, "step": 1930 }, { "epoch": 1.8, "learning_rate": 9.359507994157767e-08, "loss": 0.2033, "step": 1940 }, { "epoch": 1.81, "learning_rate": 8.495565771254387e-08, "loss": 0.2033, "step": 1950 }, { "epoch": 1.82, "learning_rate": 7.672587541424946e-08, "loss": 0.2091, "step": 1960 }, { "epoch": 1.83, "learning_rate": 6.890749344846502e-08, "loss": 0.2165, "step": 1970 }, { "epoch": 1.84, "learning_rate": 6.150218421586362e-08, "loss": 0.2044, "step": 1980 }, { "epoch": 1.85, "learning_rate": 5.451153175828338e-08, "loss": 0.2066, "step": 1990 }, { "epoch": 1.86, "learning_rate": 4.79370314198928e-08, "loss": 0.2045, "step": 2000 }, { "epoch": 1.87, "learning_rate": 4.178008952732503e-08, "loss": 0.2096, "step": 2010 }, { "epoch": 1.88, "learning_rate": 3.6042023088858286e-08, "loss": 0.2018, "step": 2020 }, { "epoch": 1.89, "learning_rate": 3.072405951269674e-08, "loss": 0.2112, "step": 2030 }, { "epoch": 1.9, "learning_rate": 2.5827336344422802e-08, "loss": 0.2022, "step": 2040 }, { "epoch": 1.91, "learning_rate": 2.1352901023667713e-08, "loss": 0.2141, "step": 2050 }, { "epoch": 1.92, "learning_rate": 1.7301710660057255e-08, "loss": 0.2043, "step": 2060 }, { "epoch": 1.93, "learning_rate": 1.3674631828481941e-08, "loss": 0.2031, "step": 2070 }, { "epoch": 1.93, "learning_rate": 1.0472440383729075e-08, "loss": 0.2022, "step": 2080 }, { "epoch": 1.94, "learning_rate": 7.695821294523508e-09, "loss": 0.2016, "step": 2090 }, { "epoch": 1.95, "learning_rate": 5.345368497009061e-09, "loss": 0.2094, "step": 2100 }, { "epoch": 1.96, "learning_rate": 3.4215847677010467e-09, "loss": 0.202, "step": 2110 }, { "epoch": 1.97, "learning_rate": 1.924881615939622e-09, "loss": 0.2016, "step": 2120 }, { "epoch": 1.98, "learning_rate": 8.555791958653191e-10, "loss": 0.2032, "step": 2130 }, { "epoch": 1.99, "learning_rate": 2.1390623793604568e-10, "loss": 0.2023, "step": 2140 }, { "epoch": 2.0, "learning_rate": 0.0, "loss": 0.2094, "step": 2150 }, { "epoch": 2.0, "step": 2150, "total_flos": 2447020503597056.0, "train_loss": 0.23743513484333836, "train_runtime": 83338.4902, "train_samples_per_second": 0.103, "train_steps_per_second": 0.026 } ], "logging_steps": 10, "max_steps": 2150, "num_input_tokens_seen": 0, "num_train_epochs": 2, "save_steps": 300, "total_flos": 2447020503597056.0, "train_batch_size": 2, "trial_name": null, "trial_params": null }