|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9992821249102656, |
|
"eval_steps": 500, |
|
"global_step": 1044, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.000957166786312515, |
|
"grad_norm": 3.9142152723532337, |
|
"learning_rate": 1.9047619047619051e-06, |
|
"loss": 1.3978, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.004785833931562575, |
|
"grad_norm": 1.3430217420733046, |
|
"learning_rate": 9.523809523809523e-06, |
|
"loss": 1.3489, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.00957166786312515, |
|
"grad_norm": 0.5850408636793494, |
|
"learning_rate": 1.9047619047619046e-05, |
|
"loss": 1.2871, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.014357501794687724, |
|
"grad_norm": 0.46666716038967326, |
|
"learning_rate": 2.857142857142857e-05, |
|
"loss": 1.2106, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.0191433357262503, |
|
"grad_norm": 0.35044064248530404, |
|
"learning_rate": 3.809523809523809e-05, |
|
"loss": 1.189, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.023929169657812874, |
|
"grad_norm": 0.27361957875198517, |
|
"learning_rate": 4.761904761904762e-05, |
|
"loss": 1.1469, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.028715003589375447, |
|
"grad_norm": 0.2368453005937937, |
|
"learning_rate": 5.714285714285714e-05, |
|
"loss": 1.158, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.03350083752093802, |
|
"grad_norm": 0.2277332385016794, |
|
"learning_rate": 6.666666666666667e-05, |
|
"loss": 1.1437, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.0382866714525006, |
|
"grad_norm": 0.2265653549311157, |
|
"learning_rate": 7.619047619047618e-05, |
|
"loss": 1.1302, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.043072505384063174, |
|
"grad_norm": 0.22079711284915807, |
|
"learning_rate": 8.571428571428571e-05, |
|
"loss": 1.13, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.04785833931562575, |
|
"grad_norm": 0.20813516832540208, |
|
"learning_rate": 9.523809523809524e-05, |
|
"loss": 1.106, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.05264417324718832, |
|
"grad_norm": 0.2044131638757028, |
|
"learning_rate": 0.00010476190476190477, |
|
"loss": 1.1348, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.057430007178750894, |
|
"grad_norm": 0.20101729146508107, |
|
"learning_rate": 0.00011428571428571428, |
|
"loss": 1.1018, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.062215841110313475, |
|
"grad_norm": 0.21865369935553125, |
|
"learning_rate": 0.0001238095238095238, |
|
"loss": 1.1129, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.06700167504187604, |
|
"grad_norm": 0.18405578482864565, |
|
"learning_rate": 0.00013333333333333334, |
|
"loss": 1.1018, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.07178750897343862, |
|
"grad_norm": 0.18488079729650672, |
|
"learning_rate": 0.00014285714285714287, |
|
"loss": 1.1417, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.0765733429050012, |
|
"grad_norm": 0.18433481759594844, |
|
"learning_rate": 0.00015238095238095237, |
|
"loss": 1.111, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.08135917683656377, |
|
"grad_norm": 0.20377971597879482, |
|
"learning_rate": 0.00016190476190476192, |
|
"loss": 1.0709, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.08614501076812635, |
|
"grad_norm": 0.20225554382239913, |
|
"learning_rate": 0.00017142857142857143, |
|
"loss": 1.1142, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.09093084469968891, |
|
"grad_norm": 0.18520967333311886, |
|
"learning_rate": 0.00018095238095238095, |
|
"loss": 1.1142, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.0957166786312515, |
|
"grad_norm": 0.19606367225373053, |
|
"learning_rate": 0.00019047619047619048, |
|
"loss": 1.1049, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.10050251256281408, |
|
"grad_norm": 0.1867473714189168, |
|
"learning_rate": 0.0002, |
|
"loss": 1.0927, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.10528834649437664, |
|
"grad_norm": 0.185817071062854, |
|
"learning_rate": 0.00019998600836567816, |
|
"loss": 1.1206, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.11007418042593922, |
|
"grad_norm": 0.1762396939142846, |
|
"learning_rate": 0.00019994403737802927, |
|
"loss": 1.1022, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.11486001435750179, |
|
"grad_norm": 0.16668242539032083, |
|
"learning_rate": 0.00019987409878190752, |
|
"loss": 1.1052, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.11964584828906437, |
|
"grad_norm": 0.1764182848939352, |
|
"learning_rate": 0.00019977621214841822, |
|
"loss": 1.1059, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.12443168222062695, |
|
"grad_norm": 0.1787380695504439, |
|
"learning_rate": 0.0001996504048694409, |
|
"loss": 1.1102, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.12921751615218952, |
|
"grad_norm": 0.18152638872711746, |
|
"learning_rate": 0.00019949671214996445, |
|
"loss": 1.0986, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.13400335008375208, |
|
"grad_norm": 0.1745817045352934, |
|
"learning_rate": 0.00019931517699823547, |
|
"loss": 1.085, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.13878918401531468, |
|
"grad_norm": 0.17554030590687575, |
|
"learning_rate": 0.0001991058502137231, |
|
"loss": 1.1363, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.14357501794687724, |
|
"grad_norm": 0.1800405053641118, |
|
"learning_rate": 0.00019886879037290384, |
|
"loss": 1.0924, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.1483608518784398, |
|
"grad_norm": 0.19148632700424054, |
|
"learning_rate": 0.0001986040638128698, |
|
"loss": 1.0824, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.1531466858100024, |
|
"grad_norm": 0.17313730878343153, |
|
"learning_rate": 0.0001983117446127654, |
|
"loss": 1.1071, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.15793251974156497, |
|
"grad_norm": 0.16961371492797625, |
|
"learning_rate": 0.00019799191457305768, |
|
"loss": 1.1311, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.16271835367312754, |
|
"grad_norm": 0.17327733992734365, |
|
"learning_rate": 0.00019764466319264595, |
|
"loss": 1.1133, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.16750418760469013, |
|
"grad_norm": 0.17176717180251114, |
|
"learning_rate": 0.00019727008764381675, |
|
"loss": 1.1153, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.1722900215362527, |
|
"grad_norm": 0.17138393560502058, |
|
"learning_rate": 0.0001968682927450523, |
|
"loss": 1.1006, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.17707585546781526, |
|
"grad_norm": 0.16465106542109514, |
|
"learning_rate": 0.00019643939093169844, |
|
"loss": 1.104, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.18186168939937783, |
|
"grad_norm": 0.1699157446206454, |
|
"learning_rate": 0.00019598350222450178, |
|
"loss": 1.1167, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.18664752333094042, |
|
"grad_norm": 0.17602023031266878, |
|
"learning_rate": 0.00019550075419602408, |
|
"loss": 1.1131, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.191433357262503, |
|
"grad_norm": 0.18282225583073394, |
|
"learning_rate": 0.00019499128193494297, |
|
"loss": 1.0889, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.19621919119406556, |
|
"grad_norm": 0.16528930064048408, |
|
"learning_rate": 0.0001944552280082499, |
|
"loss": 1.1013, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 0.20100502512562815, |
|
"grad_norm": 0.16504045631379008, |
|
"learning_rate": 0.0001938927424213553, |
|
"loss": 1.1003, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.20579085905719072, |
|
"grad_norm": 0.16559761424705557, |
|
"learning_rate": 0.000193303982576112, |
|
"loss": 1.0998, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 0.21057669298875328, |
|
"grad_norm": 0.16791680669282769, |
|
"learning_rate": 0.0001926891132267692, |
|
"loss": 1.0919, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.21536252692031588, |
|
"grad_norm": 0.16689008275055853, |
|
"learning_rate": 0.00019204830643386868, |
|
"loss": 1.1069, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.22014836085187844, |
|
"grad_norm": 0.17305744880787605, |
|
"learning_rate": 0.00019138174151609683, |
|
"loss": 1.1272, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.224934194783441, |
|
"grad_norm": 0.1618964004882894, |
|
"learning_rate": 0.00019068960500010523, |
|
"loss": 1.0827, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 0.22972002871500358, |
|
"grad_norm": 0.16411871929076272, |
|
"learning_rate": 0.00018997209056831462, |
|
"loss": 1.1164, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.23450586264656617, |
|
"grad_norm": 0.1691454723246668, |
|
"learning_rate": 0.0001892293990047159, |
|
"loss": 1.1079, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 0.23929169657812874, |
|
"grad_norm": 0.1649265891086064, |
|
"learning_rate": 0.00018846173813868454, |
|
"loss": 1.0825, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.2440775305096913, |
|
"grad_norm": 0.17018378559137046, |
|
"learning_rate": 0.000187669322786823, |
|
"loss": 1.1216, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 0.2488633644412539, |
|
"grad_norm": 0.1703316481606123, |
|
"learning_rate": 0.0001868523746928479, |
|
"loss": 1.0783, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.25364919837281646, |
|
"grad_norm": 0.16735434292797727, |
|
"learning_rate": 0.0001860111224655391, |
|
"loss": 1.1149, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 0.25843503230437903, |
|
"grad_norm": 0.1555718882454273, |
|
"learning_rate": 0.0001851458015147673, |
|
"loss": 1.1075, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.2632208662359416, |
|
"grad_norm": 0.16977397286003804, |
|
"learning_rate": 0.00018425665398561883, |
|
"loss": 1.0852, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.26800670016750416, |
|
"grad_norm": 0.1663432558952086, |
|
"learning_rate": 0.00018334392869063536, |
|
"loss": 1.0811, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.2727925340990668, |
|
"grad_norm": 0.1666368907912767, |
|
"learning_rate": 0.00018240788104018822, |
|
"loss": 1.1014, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 0.27757836803062935, |
|
"grad_norm": 0.15687924477714424, |
|
"learning_rate": 0.00018144877297100606, |
|
"loss": 1.0736, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.2823642019621919, |
|
"grad_norm": 0.16061101558707236, |
|
"learning_rate": 0.0001804668728728764, |
|
"loss": 1.0931, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 0.2871500358937545, |
|
"grad_norm": 0.160793101364396, |
|
"learning_rate": 0.00017946245551354157, |
|
"loss": 1.0999, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.29193586982531705, |
|
"grad_norm": 0.1633263725476254, |
|
"learning_rate": 0.00017843580196180952, |
|
"loss": 1.0948, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 0.2967217037568796, |
|
"grad_norm": 0.1631452387362867, |
|
"learning_rate": 0.00017738719950890168, |
|
"loss": 1.1013, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.3015075376884422, |
|
"grad_norm": 0.16210535171429089, |
|
"learning_rate": 0.00017631694158805946, |
|
"loss": 1.0798, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 0.3062933716200048, |
|
"grad_norm": 0.16229934636841442, |
|
"learning_rate": 0.000175225327692432, |
|
"loss": 1.0575, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.3110792055515674, |
|
"grad_norm": 0.16588195121854765, |
|
"learning_rate": 0.00017411266329126824, |
|
"loss": 1.096, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.31586503948312994, |
|
"grad_norm": 0.158210200244668, |
|
"learning_rate": 0.00017297925974443673, |
|
"loss": 1.1071, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.3206508734146925, |
|
"grad_norm": 0.1663784778299078, |
|
"learning_rate": 0.00017182543421529676, |
|
"loss": 1.0739, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 0.32543670734625507, |
|
"grad_norm": 0.15384028200238806, |
|
"learning_rate": 0.00017065150958194586, |
|
"loss": 1.0848, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.33022254127781764, |
|
"grad_norm": 0.16073214574887584, |
|
"learning_rate": 0.00016945781434686783, |
|
"loss": 1.1157, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 0.33500837520938026, |
|
"grad_norm": 0.1745939193140414, |
|
"learning_rate": 0.00016824468254500704, |
|
"loss": 1.0815, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.3397942091409428, |
|
"grad_norm": 0.15802897019970708, |
|
"learning_rate": 0.0001670124536502947, |
|
"loss": 1.0779, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 0.3445800430725054, |
|
"grad_norm": 0.1579431494377225, |
|
"learning_rate": 0.00016576147248065267, |
|
"loss": 1.1031, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.34936587700406796, |
|
"grad_norm": 0.16455288633589932, |
|
"learning_rate": 0.00016449208910150232, |
|
"loss": 1.1207, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 0.3541517109356305, |
|
"grad_norm": 0.15512720174775488, |
|
"learning_rate": 0.00016320465872780477, |
|
"loss": 1.0843, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.3589375448671931, |
|
"grad_norm": 0.15810739086397552, |
|
"learning_rate": 0.00016189954162466012, |
|
"loss": 1.0674, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.36372337879875566, |
|
"grad_norm": 0.15539897008538223, |
|
"learning_rate": 0.0001605771030064934, |
|
"loss": 1.1075, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.3685092127303183, |
|
"grad_norm": 0.16059302879871643, |
|
"learning_rate": 0.00015923771293485585, |
|
"loss": 1.1083, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 0.37329504666188085, |
|
"grad_norm": 0.1726863039386017, |
|
"learning_rate": 0.00015788174621486934, |
|
"loss": 1.0839, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.3780808805934434, |
|
"grad_norm": 0.160896911699282, |
|
"learning_rate": 0.00015650958229034391, |
|
"loss": 1.093, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 0.382866714525006, |
|
"grad_norm": 0.1539033105501165, |
|
"learning_rate": 0.00015512160513759672, |
|
"loss": 1.0824, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.38765254845656855, |
|
"grad_norm": 0.15253934847352404, |
|
"learning_rate": 0.00015371820315800315, |
|
"loss": 1.0611, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 0.3924383823881311, |
|
"grad_norm": 0.1549203336671571, |
|
"learning_rate": 0.00015229976906930935, |
|
"loss": 1.0926, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.3972242163196937, |
|
"grad_norm": 0.15736586699846142, |
|
"learning_rate": 0.0001508666997957369, |
|
"loss": 1.0838, |
|
"step": 415 |
|
}, |
|
{ |
|
"epoch": 0.4020100502512563, |
|
"grad_norm": 0.15414651629074486, |
|
"learning_rate": 0.00014941939635691035, |
|
"loss": 1.0962, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.40679588418281887, |
|
"grad_norm": 0.15216014768555902, |
|
"learning_rate": 0.00014795826375563925, |
|
"loss": 1.0837, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 0.41158171811438143, |
|
"grad_norm": 0.1551252486012846, |
|
"learning_rate": 0.0001464837108645845, |
|
"loss": 1.096, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.416367552045944, |
|
"grad_norm": 0.15880410911617168, |
|
"learning_rate": 0.00014499615031184296, |
|
"loss": 1.0947, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 0.42115338597750657, |
|
"grad_norm": 0.16084656769756484, |
|
"learning_rate": 0.00014349599836548034, |
|
"loss": 1.0955, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.42593921990906913, |
|
"grad_norm": 0.14942909791958908, |
|
"learning_rate": 0.0001419836748170459, |
|
"loss": 1.0911, |
|
"step": 445 |
|
}, |
|
{ |
|
"epoch": 0.43072505384063176, |
|
"grad_norm": 0.16134597273400678, |
|
"learning_rate": 0.0001404596028641009, |
|
"loss": 1.1136, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.4355108877721943, |
|
"grad_norm": 0.15552785776756606, |
|
"learning_rate": 0.0001389242089917943, |
|
"loss": 1.1005, |
|
"step": 455 |
|
}, |
|
{ |
|
"epoch": 0.4402967217037569, |
|
"grad_norm": 0.1544583591443468, |
|
"learning_rate": 0.00013737792285351805, |
|
"loss": 1.0896, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.44508255563531945, |
|
"grad_norm": 0.15743294110434283, |
|
"learning_rate": 0.0001358211771506763, |
|
"loss": 1.0687, |
|
"step": 465 |
|
}, |
|
{ |
|
"epoch": 0.449868389566882, |
|
"grad_norm": 0.15489693015617015, |
|
"learning_rate": 0.00013425440751160112, |
|
"loss": 1.0909, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.4546542234984446, |
|
"grad_norm": 0.1556280787651109, |
|
"learning_rate": 0.00013267805236964967, |
|
"loss": 1.1008, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 0.45944005743000715, |
|
"grad_norm": 0.16139496091159036, |
|
"learning_rate": 0.00013109255284051615, |
|
"loss": 1.1167, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.4642258913615698, |
|
"grad_norm": 0.15380326887200926, |
|
"learning_rate": 0.00012949835259879304, |
|
"loss": 1.1021, |
|
"step": 485 |
|
}, |
|
{ |
|
"epoch": 0.46901172529313234, |
|
"grad_norm": 0.1504710821626308, |
|
"learning_rate": 0.00012789589775381676, |
|
"loss": 1.0824, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.4737975592246949, |
|
"grad_norm": 0.16882632755621252, |
|
"learning_rate": 0.00012628563672483146, |
|
"loss": 1.091, |
|
"step": 495 |
|
}, |
|
{ |
|
"epoch": 0.4785833931562575, |
|
"grad_norm": 0.16236683430294702, |
|
"learning_rate": 0.0001246680201155068, |
|
"loss": 1.0609, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.48336922708782004, |
|
"grad_norm": 0.1534881294655078, |
|
"learning_rate": 0.00012304350058784405, |
|
"loss": 1.0611, |
|
"step": 505 |
|
}, |
|
{ |
|
"epoch": 0.4881550610193826, |
|
"grad_norm": 0.16620841316700394, |
|
"learning_rate": 0.00012141253273550696, |
|
"loss": 1.0932, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.49294089495094523, |
|
"grad_norm": 0.16942714030828704, |
|
"learning_rate": 0.00011977557295661108, |
|
"loss": 1.0856, |
|
"step": 515 |
|
}, |
|
{ |
|
"epoch": 0.4977267288825078, |
|
"grad_norm": 0.15500201031703087, |
|
"learning_rate": 0.00011813307932600887, |
|
"loss": 1.0852, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.5025125628140703, |
|
"grad_norm": 0.15248801968172002, |
|
"learning_rate": 0.00011648551146710556, |
|
"loss": 1.1069, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 0.5072983967456329, |
|
"grad_norm": 0.14978453385390675, |
|
"learning_rate": 0.0001148333304232411, |
|
"loss": 1.088, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.5120842306771956, |
|
"grad_norm": 0.14736066147246124, |
|
"learning_rate": 0.00011317699852867548, |
|
"loss": 1.0506, |
|
"step": 535 |
|
}, |
|
{ |
|
"epoch": 0.5168700646087581, |
|
"grad_norm": 0.15088998664120562, |
|
"learning_rate": 0.0001115169792792124, |
|
"loss": 1.0972, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.5216558985403207, |
|
"grad_norm": 0.14676026138747209, |
|
"learning_rate": 0.00010985373720249801, |
|
"loss": 1.0871, |
|
"step": 545 |
|
}, |
|
{ |
|
"epoch": 0.5264417324718832, |
|
"grad_norm": 0.17054822297185676, |
|
"learning_rate": 0.00010818773772803082, |
|
"loss": 1.0957, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.5312275664034458, |
|
"grad_norm": 0.15081743477470166, |
|
"learning_rate": 0.0001065194470569193, |
|
"loss": 1.1114, |
|
"step": 555 |
|
}, |
|
{ |
|
"epoch": 0.5360134003350083, |
|
"grad_norm": 0.1556600989117304, |
|
"learning_rate": 0.0001048493320314238, |
|
"loss": 1.0747, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.540799234266571, |
|
"grad_norm": 0.15346464585086714, |
|
"learning_rate": 0.00010317786000431851, |
|
"loss": 1.0761, |
|
"step": 565 |
|
}, |
|
{ |
|
"epoch": 0.5455850681981336, |
|
"grad_norm": 0.15178562379014646, |
|
"learning_rate": 0.00010150549870811107, |
|
"loss": 1.0839, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.5503709021296961, |
|
"grad_norm": 0.15263581024104103, |
|
"learning_rate": 9.983271612415575e-05, |
|
"loss": 1.0742, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 0.5551567360612587, |
|
"grad_norm": 0.15166582071053056, |
|
"learning_rate": 9.81599803516968e-05, |
|
"loss": 1.0725, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.5599425699928212, |
|
"grad_norm": 0.14735687803417952, |
|
"learning_rate": 9.648775947687912e-05, |
|
"loss": 1.0705, |
|
"step": 585 |
|
}, |
|
{ |
|
"epoch": 0.5647284039243838, |
|
"grad_norm": 0.14825818203221888, |
|
"learning_rate": 9.48165214417624e-05, |
|
"loss": 1.0871, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.5695142378559463, |
|
"grad_norm": 0.15700946642781993, |
|
"learning_rate": 9.314673391337576e-05, |
|
"loss": 1.0979, |
|
"step": 595 |
|
}, |
|
{ |
|
"epoch": 0.574300071787509, |
|
"grad_norm": 0.15580031067347558, |
|
"learning_rate": 9.147886415284903e-05, |
|
"loss": 1.0592, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.5790859057190716, |
|
"grad_norm": 0.14548002556094225, |
|
"learning_rate": 8.981337888465788e-05, |
|
"loss": 1.0787, |
|
"step": 605 |
|
}, |
|
{ |
|
"epoch": 0.5838717396506341, |
|
"grad_norm": 0.14237124600928142, |
|
"learning_rate": 8.815074416601913e-05, |
|
"loss": 1.0698, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.5886575735821967, |
|
"grad_norm": 0.15304745525626437, |
|
"learning_rate": 8.649142525647272e-05, |
|
"loss": 1.0848, |
|
"step": 615 |
|
}, |
|
{ |
|
"epoch": 0.5934434075137592, |
|
"grad_norm": 0.14513336716190856, |
|
"learning_rate": 8.48358864876867e-05, |
|
"loss": 1.0462, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.5982292414453219, |
|
"grad_norm": 0.1468415945819683, |
|
"learning_rate": 8.318459113352221e-05, |
|
"loss": 1.0906, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 0.6030150753768844, |
|
"grad_norm": 0.14408143553897426, |
|
"learning_rate": 8.153800128039441e-05, |
|
"loss": 1.085, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.607800909308447, |
|
"grad_norm": 0.15046217184291616, |
|
"learning_rate": 7.989657769796533e-05, |
|
"loss": 1.0882, |
|
"step": 635 |
|
}, |
|
{ |
|
"epoch": 0.6125867432400096, |
|
"grad_norm": 0.14348283659906289, |
|
"learning_rate": 7.82607797102056e-05, |
|
"loss": 1.0861, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.6173725771715721, |
|
"grad_norm": 0.14685503152106738, |
|
"learning_rate": 7.663106506686057e-05, |
|
"loss": 1.1003, |
|
"step": 645 |
|
}, |
|
{ |
|
"epoch": 0.6221584111031347, |
|
"grad_norm": 0.1480277391784376, |
|
"learning_rate": 7.500788981535708e-05, |
|
"loss": 1.0758, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.6269442450346973, |
|
"grad_norm": 0.1477910922274185, |
|
"learning_rate": 7.339170817318625e-05, |
|
"loss": 1.0695, |
|
"step": 655 |
|
}, |
|
{ |
|
"epoch": 0.6317300789662599, |
|
"grad_norm": 0.1551465349289344, |
|
"learning_rate": 7.178297240079882e-05, |
|
"loss": 1.0942, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.6365159128978225, |
|
"grad_norm": 0.148811465121087, |
|
"learning_rate": 7.018213267504775e-05, |
|
"loss": 1.0825, |
|
"step": 665 |
|
}, |
|
{ |
|
"epoch": 0.641301746829385, |
|
"grad_norm": 0.146937156337137, |
|
"learning_rate": 6.858963696321403e-05, |
|
"loss": 1.0985, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.6460875807609476, |
|
"grad_norm": 0.14703161191479286, |
|
"learning_rate": 6.700593089765086e-05, |
|
"loss": 1.06, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 0.6508734146925101, |
|
"grad_norm": 0.14564360148371303, |
|
"learning_rate": 6.543145765108106e-05, |
|
"loss": 1.0853, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.6556592486240728, |
|
"grad_norm": 0.14887365645849163, |
|
"learning_rate": 6.3866657812583e-05, |
|
"loss": 1.0787, |
|
"step": 685 |
|
}, |
|
{ |
|
"epoch": 0.6604450825556353, |
|
"grad_norm": 0.14533659914404762, |
|
"learning_rate": 6.231196926429913e-05, |
|
"loss": 1.073, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.6652309164871979, |
|
"grad_norm": 0.2354314895944445, |
|
"learning_rate": 6.076782705890257e-05, |
|
"loss": 1.0815, |
|
"step": 695 |
|
}, |
|
{ |
|
"epoch": 0.6700167504187605, |
|
"grad_norm": 0.14132233475416703, |
|
"learning_rate": 5.9234663297854876e-05, |
|
"loss": 1.0555, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.674802584350323, |
|
"grad_norm": 0.14913316600220797, |
|
"learning_rate": 5.7712907010490036e-05, |
|
"loss": 1.0785, |
|
"step": 705 |
|
}, |
|
{ |
|
"epoch": 0.6795884182818857, |
|
"grad_norm": 0.15328072297180578, |
|
"learning_rate": 5.620298403395805e-05, |
|
"loss": 1.0857, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.6843742522134482, |
|
"grad_norm": 0.17603388258774993, |
|
"learning_rate": 5.4705316894061765e-05, |
|
"loss": 1.0898, |
|
"step": 715 |
|
}, |
|
{ |
|
"epoch": 0.6891600861450108, |
|
"grad_norm": 0.1448443355064005, |
|
"learning_rate": 5.322032468702036e-05, |
|
"loss": 1.0714, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.6939459200765733, |
|
"grad_norm": 0.4624474555190123, |
|
"learning_rate": 5.1748422962192376e-05, |
|
"loss": 1.0994, |
|
"step": 725 |
|
}, |
|
{ |
|
"epoch": 0.6987317540081359, |
|
"grad_norm": 0.14868980834848183, |
|
"learning_rate": 5.0290023605791666e-05, |
|
"loss": 1.0725, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.7035175879396985, |
|
"grad_norm": 0.15278504704361137, |
|
"learning_rate": 4.8845534725628086e-05, |
|
"loss": 1.0962, |
|
"step": 735 |
|
}, |
|
{ |
|
"epoch": 0.708303421871261, |
|
"grad_norm": 0.14605679246576617, |
|
"learning_rate": 4.741536053690552e-05, |
|
"loss": 1.0947, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.7130892558028237, |
|
"grad_norm": 0.172204603811799, |
|
"learning_rate": 4.599990124910918e-05, |
|
"loss": 1.0758, |
|
"step": 745 |
|
}, |
|
{ |
|
"epoch": 0.7178750897343862, |
|
"grad_norm": 0.14357849865669614, |
|
"learning_rate": 4.4599552954014145e-05, |
|
"loss": 1.0682, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.7226609236659488, |
|
"grad_norm": 0.14980923833672957, |
|
"learning_rate": 4.32147075148458e-05, |
|
"loss": 1.0814, |
|
"step": 755 |
|
}, |
|
{ |
|
"epoch": 0.7274467575975113, |
|
"grad_norm": 0.16395768222951593, |
|
"learning_rate": 4.1845752456623665e-05, |
|
"loss": 1.0583, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.7322325915290739, |
|
"grad_norm": 0.14059821304657993, |
|
"learning_rate": 4.049307085771931e-05, |
|
"loss": 1.0839, |
|
"step": 765 |
|
}, |
|
{ |
|
"epoch": 0.7370184254606366, |
|
"grad_norm": 0.1472110334031576, |
|
"learning_rate": 3.9157041242658477e-05, |
|
"loss": 1.1079, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.7418042593921991, |
|
"grad_norm": 0.14020342123522012, |
|
"learning_rate": 3.783803747619741e-05, |
|
"loss": 1.0829, |
|
"step": 775 |
|
}, |
|
{ |
|
"epoch": 0.7465900933237617, |
|
"grad_norm": 0.17437047699695307, |
|
"learning_rate": 3.653642865870359e-05, |
|
"loss": 1.0808, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.7513759272553242, |
|
"grad_norm": 0.14320013892049976, |
|
"learning_rate": 3.525257902286908e-05, |
|
"loss": 1.0608, |
|
"step": 785 |
|
}, |
|
{ |
|
"epoch": 0.7561617611868868, |
|
"grad_norm": 0.14437417000631428, |
|
"learning_rate": 3.398684783178648e-05, |
|
"loss": 1.0618, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.7609475951184493, |
|
"grad_norm": 0.14321363672597254, |
|
"learning_rate": 3.273958927841525e-05, |
|
"loss": 1.0659, |
|
"step": 795 |
|
}, |
|
{ |
|
"epoch": 0.765733429050012, |
|
"grad_norm": 0.14121990349576288, |
|
"learning_rate": 3.1511152386467055e-05, |
|
"loss": 1.0936, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.7705192629815746, |
|
"grad_norm": 0.16146069783583863, |
|
"learning_rate": 3.0301880912737568e-05, |
|
"loss": 1.0647, |
|
"step": 805 |
|
}, |
|
{ |
|
"epoch": 0.7753050969131371, |
|
"grad_norm": 0.1447026626027737, |
|
"learning_rate": 2.9112113250911844e-05, |
|
"loss": 1.0747, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.7800909308446997, |
|
"grad_norm": 0.14724228311552523, |
|
"learning_rate": 2.7942182336870925e-05, |
|
"loss": 1.1046, |
|
"step": 815 |
|
}, |
|
{ |
|
"epoch": 0.7848767647762622, |
|
"grad_norm": 0.14612792897080507, |
|
"learning_rate": 2.6792415555525463e-05, |
|
"loss": 1.0391, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.7896625987078248, |
|
"grad_norm": 0.14445016139434405, |
|
"learning_rate": 2.5663134649202647e-05, |
|
"loss": 1.0808, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 0.7944484326393874, |
|
"grad_norm": 0.14283033243615206, |
|
"learning_rate": 2.4554655627612245e-05, |
|
"loss": 1.0767, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.79923426657095, |
|
"grad_norm": 0.1428104588189023, |
|
"learning_rate": 2.34672886794167e-05, |
|
"loss": 1.0884, |
|
"step": 835 |
|
}, |
|
{ |
|
"epoch": 0.8040201005025126, |
|
"grad_norm": 0.14106416222944104, |
|
"learning_rate": 2.2401338085430323e-05, |
|
"loss": 1.0891, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.8088059344340751, |
|
"grad_norm": 0.14453431354715718, |
|
"learning_rate": 2.135710213347134e-05, |
|
"loss": 1.0829, |
|
"step": 845 |
|
}, |
|
{ |
|
"epoch": 0.8135917683656377, |
|
"grad_norm": 0.1436138017414945, |
|
"learning_rate": 2.0334873034891554e-05, |
|
"loss": 1.0823, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.8183776022972002, |
|
"grad_norm": 0.14415504753616376, |
|
"learning_rate": 1.933493684280574e-05, |
|
"loss": 1.0749, |
|
"step": 855 |
|
}, |
|
{ |
|
"epoch": 0.8231634362287629, |
|
"grad_norm": 0.14188286670890893, |
|
"learning_rate": 1.8357573372044834e-05, |
|
"loss": 1.0775, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.8279492701603255, |
|
"grad_norm": 0.14043422592547342, |
|
"learning_rate": 1.740305612085439e-05, |
|
"loss": 1.0852, |
|
"step": 865 |
|
}, |
|
{ |
|
"epoch": 0.832735104091888, |
|
"grad_norm": 0.14014109535516273, |
|
"learning_rate": 1.647165219436113e-05, |
|
"loss": 1.0716, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.8375209380234506, |
|
"grad_norm": 0.18266681120520475, |
|
"learning_rate": 1.556362222982799e-05, |
|
"loss": 1.0711, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 0.8423067719550131, |
|
"grad_norm": 0.14585487433506303, |
|
"learning_rate": 1.4679220323719234e-05, |
|
"loss": 1.0561, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.8470926058865758, |
|
"grad_norm": 0.13911103035630754, |
|
"learning_rate": 1.3818693960596185e-05, |
|
"loss": 1.0707, |
|
"step": 885 |
|
}, |
|
{ |
|
"epoch": 0.8518784398181383, |
|
"grad_norm": 0.15612123605821987, |
|
"learning_rate": 1.2982283943862738e-05, |
|
"loss": 1.0494, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.8566642737497009, |
|
"grad_norm": 0.14067555622023134, |
|
"learning_rate": 1.217022432838093e-05, |
|
"loss": 1.0686, |
|
"step": 895 |
|
}, |
|
{ |
|
"epoch": 0.8614501076812635, |
|
"grad_norm": 0.1457410414761679, |
|
"learning_rate": 1.1382742354974429e-05, |
|
"loss": 1.0562, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.866235941612826, |
|
"grad_norm": 0.1398250627278749, |
|
"learning_rate": 1.0620058386839393e-05, |
|
"loss": 1.0753, |
|
"step": 905 |
|
}, |
|
{ |
|
"epoch": 0.8710217755443886, |
|
"grad_norm": 0.14690238478434312, |
|
"learning_rate": 9.882385847879539e-06, |
|
"loss": 1.0539, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.8758076094759512, |
|
"grad_norm": 0.14224902345010998, |
|
"learning_rate": 9.169931162983137e-06, |
|
"loss": 1.0575, |
|
"step": 915 |
|
}, |
|
{ |
|
"epoch": 0.8805934434075138, |
|
"grad_norm": 0.14002967562121116, |
|
"learning_rate": 8.482893700258643e-06, |
|
"loss": 1.0831, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.8853792773390763, |
|
"grad_norm": 0.14652920530592364, |
|
"learning_rate": 7.821465715244947e-06, |
|
"loss": 1.0844, |
|
"step": 925 |
|
}, |
|
{ |
|
"epoch": 0.8901651112706389, |
|
"grad_norm": 0.13985808750925746, |
|
"learning_rate": 7.185832297111938e-06, |
|
"loss": 1.0618, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.8949509452022015, |
|
"grad_norm": 0.15160308510490375, |
|
"learning_rate": 6.576171316866608e-06, |
|
"loss": 1.0773, |
|
"step": 935 |
|
}, |
|
{ |
|
"epoch": 0.899736779133764, |
|
"grad_norm": 0.14784429409642344, |
|
"learning_rate": 5.9926533775789055e-06, |
|
"loss": 1.0951, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.9045226130653267, |
|
"grad_norm": 0.14167088318411009, |
|
"learning_rate": 5.435441766641369e-06, |
|
"loss": 1.0841, |
|
"step": 945 |
|
}, |
|
{ |
|
"epoch": 0.9093084469968892, |
|
"grad_norm": 0.14256818695069146, |
|
"learning_rate": 4.904692410075973e-06, |
|
"loss": 1.0647, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.9140942809284518, |
|
"grad_norm": 0.15531748633710526, |
|
"learning_rate": 4.400553828900989e-06, |
|
"loss": 1.0757, |
|
"step": 955 |
|
}, |
|
{ |
|
"epoch": 0.9188801148600143, |
|
"grad_norm": 0.14420681549864126, |
|
"learning_rate": 3.923167097569935e-06, |
|
"loss": 1.0903, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.9236659487915769, |
|
"grad_norm": 0.14398010788396462, |
|
"learning_rate": 3.4726658044943126e-06, |
|
"loss": 1.0668, |
|
"step": 965 |
|
}, |
|
{ |
|
"epoch": 0.9284517827231396, |
|
"grad_norm": 0.14589900176146645, |
|
"learning_rate": 3.0491760146611926e-06, |
|
"loss": 1.0845, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.9332376166547021, |
|
"grad_norm": 0.13882750982702796, |
|
"learning_rate": 2.652816234356159e-06, |
|
"loss": 1.0382, |
|
"step": 975 |
|
}, |
|
{ |
|
"epoch": 0.9380234505862647, |
|
"grad_norm": 0.14112035905216325, |
|
"learning_rate": 2.283697378001315e-06, |
|
"loss": 1.0825, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.9428092845178272, |
|
"grad_norm": 0.13934480624047157, |
|
"learning_rate": 1.9419227371178627e-06, |
|
"loss": 1.0679, |
|
"step": 985 |
|
}, |
|
{ |
|
"epoch": 0.9475951184493898, |
|
"grad_norm": 0.14117739445269173, |
|
"learning_rate": 1.6275879514217052e-06, |
|
"loss": 1.0772, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.9523809523809523, |
|
"grad_norm": 0.14031209854381504, |
|
"learning_rate": 1.3407809820603856e-06, |
|
"loss": 1.0767, |
|
"step": 995 |
|
}, |
|
{ |
|
"epoch": 0.957166786312515, |
|
"grad_norm": 0.14091355128035063, |
|
"learning_rate": 1.0815820869985893e-06, |
|
"loss": 1.0635, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.9619526202440776, |
|
"grad_norm": 0.14100298765660577, |
|
"learning_rate": 8.50063798559475e-07, |
|
"loss": 1.0861, |
|
"step": 1005 |
|
}, |
|
{ |
|
"epoch": 0.9667384541756401, |
|
"grad_norm": 0.1412224316948679, |
|
"learning_rate": 6.462909031276443e-07, |
|
"loss": 1.0633, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 0.9715242881072027, |
|
"grad_norm": 0.1385906964183353, |
|
"learning_rate": 4.7032042301985434e-07, |
|
"loss": 1.0726, |
|
"step": 1015 |
|
}, |
|
{ |
|
"epoch": 0.9763101220387652, |
|
"grad_norm": 0.151976410727331, |
|
"learning_rate": 3.222016005282824e-07, |
|
"loss": 1.0645, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.9810959559703278, |
|
"grad_norm": 0.14304003914264313, |
|
"learning_rate": 2.0197588414094804e-07, |
|
"loss": 1.0785, |
|
"step": 1025 |
|
}, |
|
{ |
|
"epoch": 0.9858817899018905, |
|
"grad_norm": 0.1395525833943131, |
|
"learning_rate": 1.0967691694302451e-07, |
|
"loss": 1.0582, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 0.990667623833453, |
|
"grad_norm": 0.14208441339069042, |
|
"learning_rate": 4.5330527202480654e-08, |
|
"loss": 1.0763, |
|
"step": 1035 |
|
}, |
|
{ |
|
"epoch": 0.9954534577650156, |
|
"grad_norm": 0.13931375530799342, |
|
"learning_rate": 8.95472114241791e-09, |
|
"loss": 1.0444, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.9992821249102656, |
|
"eval_loss": 1.077100157737732, |
|
"eval_runtime": 3923.6787, |
|
"eval_samples_per_second": 3.43, |
|
"eval_steps_per_second": 0.858, |
|
"step": 1044 |
|
}, |
|
{ |
|
"epoch": 0.9992821249102656, |
|
"step": 1044, |
|
"total_flos": 2155604625850368.0, |
|
"train_loss": 1.091635976486279, |
|
"train_runtime": 24351.9745, |
|
"train_samples_per_second": 2.746, |
|
"train_steps_per_second": 0.043 |
|
} |
|
], |
|
"logging_steps": 5, |
|
"max_steps": 1044, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 2155604625850368.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|