|
{ |
|
"best_global_step": null, |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 10.0, |
|
"eval_steps": 500, |
|
"global_step": 6190, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.01615508885298869, |
|
"grad_norm": 13.517727851867676, |
|
"learning_rate": 2.903225806451613e-06, |
|
"loss": 1.0539, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.03231017770597738, |
|
"grad_norm": 2.3650765419006348, |
|
"learning_rate": 6.129032258064516e-06, |
|
"loss": 0.5986, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.048465266558966075, |
|
"grad_norm": 3.7127814292907715, |
|
"learning_rate": 9.35483870967742e-06, |
|
"loss": 0.325, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.06462035541195477, |
|
"grad_norm": 2.470418691635132, |
|
"learning_rate": 1.2580645161290322e-05, |
|
"loss": 0.2792, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.08077544426494346, |
|
"grad_norm": 1.4894506931304932, |
|
"learning_rate": 1.5806451612903226e-05, |
|
"loss": 0.2738, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.09693053311793215, |
|
"grad_norm": 1.6425580978393555, |
|
"learning_rate": 1.9032258064516127e-05, |
|
"loss": 0.2348, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.11308562197092084, |
|
"grad_norm": 9.821566581726074, |
|
"learning_rate": 2.2258064516129034e-05, |
|
"loss": 0.2049, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.12924071082390953, |
|
"grad_norm": 2.8060154914855957, |
|
"learning_rate": 2.5483870967741935e-05, |
|
"loss": 0.2037, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.14539579967689822, |
|
"grad_norm": 1.5872341394424438, |
|
"learning_rate": 2.8709677419354843e-05, |
|
"loss": 0.1972, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.16155088852988692, |
|
"grad_norm": 5.6780219078063965, |
|
"learning_rate": 3.193548387096774e-05, |
|
"loss": 0.1868, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.1777059773828756, |
|
"grad_norm": 0.8519928455352783, |
|
"learning_rate": 3.516129032258065e-05, |
|
"loss": 0.1799, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.1938610662358643, |
|
"grad_norm": 0.983458399772644, |
|
"learning_rate": 3.838709677419355e-05, |
|
"loss": 0.1686, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.210016155088853, |
|
"grad_norm": 1.8987292051315308, |
|
"learning_rate": 4.161290322580645e-05, |
|
"loss": 0.1778, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.22617124394184168, |
|
"grad_norm": 1.6562193632125854, |
|
"learning_rate": 4.4838709677419356e-05, |
|
"loss": 0.1735, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.24232633279483037, |
|
"grad_norm": 1.770867109298706, |
|
"learning_rate": 4.806451612903226e-05, |
|
"loss": 0.171, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.25848142164781907, |
|
"grad_norm": 1.1404958963394165, |
|
"learning_rate": 5.1290322580645164e-05, |
|
"loss": 0.1575, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.27463651050080773, |
|
"grad_norm": 1.507441759109497, |
|
"learning_rate": 5.451612903225807e-05, |
|
"loss": 0.1526, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.29079159935379645, |
|
"grad_norm": 1.0781203508377075, |
|
"learning_rate": 5.7741935483870965e-05, |
|
"loss": 0.1508, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.3069466882067851, |
|
"grad_norm": 1.5736271142959595, |
|
"learning_rate": 6.096774193548387e-05, |
|
"loss": 0.1368, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.32310177705977383, |
|
"grad_norm": 1.4114209413528442, |
|
"learning_rate": 6.419354838709679e-05, |
|
"loss": 0.1326, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.3392568659127625, |
|
"grad_norm": 1.3289586305618286, |
|
"learning_rate": 6.741935483870968e-05, |
|
"loss": 0.1338, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.3554119547657512, |
|
"grad_norm": 0.8117440342903137, |
|
"learning_rate": 7.064516129032258e-05, |
|
"loss": 0.1326, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.3715670436187399, |
|
"grad_norm": 1.1739834547042847, |
|
"learning_rate": 7.387096774193549e-05, |
|
"loss": 0.1102, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.3877221324717286, |
|
"grad_norm": 1.4124845266342163, |
|
"learning_rate": 7.709677419354839e-05, |
|
"loss": 0.1204, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.40387722132471726, |
|
"grad_norm": 0.8694249987602234, |
|
"learning_rate": 8.03225806451613e-05, |
|
"loss": 0.1075, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.420032310177706, |
|
"grad_norm": 1.367783546447754, |
|
"learning_rate": 8.35483870967742e-05, |
|
"loss": 0.1086, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.43618739903069464, |
|
"grad_norm": 1.4387221336364746, |
|
"learning_rate": 8.677419354838711e-05, |
|
"loss": 0.1285, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.45234248788368336, |
|
"grad_norm": 0.9508649706840515, |
|
"learning_rate": 9e-05, |
|
"loss": 0.1077, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.46849757673667203, |
|
"grad_norm": 0.9184303283691406, |
|
"learning_rate": 9.32258064516129e-05, |
|
"loss": 0.1263, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.48465266558966075, |
|
"grad_norm": 1.1463005542755127, |
|
"learning_rate": 9.645161290322581e-05, |
|
"loss": 0.1027, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.5008077544426495, |
|
"grad_norm": 1.938699722290039, |
|
"learning_rate": 9.967741935483872e-05, |
|
"loss": 0.1061, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.5169628432956381, |
|
"grad_norm": 0.9912849068641663, |
|
"learning_rate": 9.999942194483773e-05, |
|
"loss": 0.1036, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.5331179321486268, |
|
"grad_norm": 1.1873068809509277, |
|
"learning_rate": 9.999742374662181e-05, |
|
"loss": 0.0954, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.5492730210016155, |
|
"grad_norm": 1.0425370931625366, |
|
"learning_rate": 9.999399832589556e-05, |
|
"loss": 0.0923, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.5654281098546042, |
|
"grad_norm": 1.1135231256484985, |
|
"learning_rate": 9.998914578044079e-05, |
|
"loss": 0.0958, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.5815831987075929, |
|
"grad_norm": 0.9654638767242432, |
|
"learning_rate": 9.998286624877786e-05, |
|
"loss": 0.1026, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.5977382875605816, |
|
"grad_norm": 1.106973648071289, |
|
"learning_rate": 9.99751599101618e-05, |
|
"loss": 0.0945, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.6138933764135702, |
|
"grad_norm": 1.0972684621810913, |
|
"learning_rate": 9.996602698457715e-05, |
|
"loss": 0.0857, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.630048465266559, |
|
"grad_norm": 0.9330363869667053, |
|
"learning_rate": 9.995546773273166e-05, |
|
"loss": 0.0908, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.6462035541195477, |
|
"grad_norm": 0.9228382706642151, |
|
"learning_rate": 9.994348245604892e-05, |
|
"loss": 0.0929, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.6623586429725363, |
|
"grad_norm": 1.4199814796447754, |
|
"learning_rate": 9.993007149665967e-05, |
|
"loss": 0.1023, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.678513731825525, |
|
"grad_norm": 1.0425035953521729, |
|
"learning_rate": 9.991523523739211e-05, |
|
"loss": 0.0924, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.6946688206785138, |
|
"grad_norm": 0.9444074034690857, |
|
"learning_rate": 9.989897410176093e-05, |
|
"loss": 0.0961, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.7108239095315024, |
|
"grad_norm": 0.8055535554885864, |
|
"learning_rate": 9.988128855395523e-05, |
|
"loss": 0.0891, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.7269789983844911, |
|
"grad_norm": 1.0856647491455078, |
|
"learning_rate": 9.986217909882522e-05, |
|
"loss": 0.0849, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.7431340872374798, |
|
"grad_norm": 0.8828626275062561, |
|
"learning_rate": 9.984164628186796e-05, |
|
"loss": 0.0893, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.7592891760904685, |
|
"grad_norm": 0.7011072039604187, |
|
"learning_rate": 9.981969068921158e-05, |
|
"loss": 0.0951, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.7754442649434572, |
|
"grad_norm": 0.6536422967910767, |
|
"learning_rate": 9.979631294759871e-05, |
|
"loss": 0.0805, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.7915993537964459, |
|
"grad_norm": 0.6991639733314514, |
|
"learning_rate": 9.97715137243685e-05, |
|
"loss": 0.0809, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.8077544426494345, |
|
"grad_norm": 0.9698547124862671, |
|
"learning_rate": 9.974529372743761e-05, |
|
"loss": 0.0875, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.8239095315024233, |
|
"grad_norm": 0.7342029809951782, |
|
"learning_rate": 9.971765370528006e-05, |
|
"loss": 0.0821, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.840064620355412, |
|
"grad_norm": 0.5005660057067871, |
|
"learning_rate": 9.968859444690567e-05, |
|
"loss": 0.0748, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.8562197092084006, |
|
"grad_norm": 0.5115198493003845, |
|
"learning_rate": 9.965811678183777e-05, |
|
"loss": 0.0804, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.8723747980613893, |
|
"grad_norm": 0.7139051556587219, |
|
"learning_rate": 9.962622158008938e-05, |
|
"loss": 0.0686, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.8885298869143781, |
|
"grad_norm": 0.5260514616966248, |
|
"learning_rate": 9.959290975213841e-05, |
|
"loss": 0.0831, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.9046849757673667, |
|
"grad_norm": 0.5752175450325012, |
|
"learning_rate": 9.955818224890165e-05, |
|
"loss": 0.0656, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.9208400646203554, |
|
"grad_norm": 0.6161171197891235, |
|
"learning_rate": 9.952204006170771e-05, |
|
"loss": 0.0697, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.9369951534733441, |
|
"grad_norm": 0.935058057308197, |
|
"learning_rate": 9.948448422226856e-05, |
|
"loss": 0.0774, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.9531502423263328, |
|
"grad_norm": 1.006998062133789, |
|
"learning_rate": 9.944551580265026e-05, |
|
"loss": 0.0788, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.9693053311793215, |
|
"grad_norm": 0.9937463998794556, |
|
"learning_rate": 9.940513591524222e-05, |
|
"loss": 0.075, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.9854604200323102, |
|
"grad_norm": 0.840084433555603, |
|
"learning_rate": 9.936334571272554e-05, |
|
"loss": 0.0805, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 1.001615508885299, |
|
"grad_norm": 0.9836556315422058, |
|
"learning_rate": 9.932014638804001e-05, |
|
"loss": 0.0753, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 1.0177705977382876, |
|
"grad_norm": 0.7406233549118042, |
|
"learning_rate": 9.927553917435017e-05, |
|
"loss": 0.0695, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 1.0339256865912763, |
|
"grad_norm": 0.8061002492904663, |
|
"learning_rate": 9.922952534501002e-05, |
|
"loss": 0.0682, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 1.050080775444265, |
|
"grad_norm": 0.6358613967895508, |
|
"learning_rate": 9.918210621352668e-05, |
|
"loss": 0.077, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 1.0662358642972536, |
|
"grad_norm": 0.6549187898635864, |
|
"learning_rate": 9.913328313352292e-05, |
|
"loss": 0.0739, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 1.0823909531502423, |
|
"grad_norm": 0.8390158414840698, |
|
"learning_rate": 9.908305749869858e-05, |
|
"loss": 0.0883, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 1.098546042003231, |
|
"grad_norm": 0.942304253578186, |
|
"learning_rate": 9.90314307427906e-05, |
|
"loss": 0.0788, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 1.1147011308562198, |
|
"grad_norm": 1.1538914442062378, |
|
"learning_rate": 9.897840433953234e-05, |
|
"loss": 0.0766, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 1.1308562197092085, |
|
"grad_norm": 0.6114380359649658, |
|
"learning_rate": 9.892397980261128e-05, |
|
"loss": 0.0754, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 1.1470113085621971, |
|
"grad_norm": 0.9622769355773926, |
|
"learning_rate": 9.886815868562596e-05, |
|
"loss": 0.0824, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 1.1631663974151858, |
|
"grad_norm": 0.6100155115127563, |
|
"learning_rate": 9.88109425820416e-05, |
|
"loss": 0.067, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 1.1793214862681745, |
|
"grad_norm": 0.5996105670928955, |
|
"learning_rate": 9.875233312514454e-05, |
|
"loss": 0.0663, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 1.1954765751211631, |
|
"grad_norm": 0.5155414342880249, |
|
"learning_rate": 9.869233198799572e-05, |
|
"loss": 0.0629, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 1.2116316639741518, |
|
"grad_norm": 0.6942029595375061, |
|
"learning_rate": 9.863094088338288e-05, |
|
"loss": 0.0764, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 1.2277867528271407, |
|
"grad_norm": 0.5376043319702148, |
|
"learning_rate": 9.856816156377163e-05, |
|
"loss": 0.0675, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 1.2439418416801293, |
|
"grad_norm": 0.6686906814575195, |
|
"learning_rate": 9.850399582125548e-05, |
|
"loss": 0.0767, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 1.260096930533118, |
|
"grad_norm": 0.768054723739624, |
|
"learning_rate": 9.843844548750464e-05, |
|
"loss": 0.0716, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 1.2762520193861067, |
|
"grad_norm": 0.6787708401679993, |
|
"learning_rate": 9.837151243371376e-05, |
|
"loss": 0.0672, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 1.2924071082390953, |
|
"grad_norm": 0.500952959060669, |
|
"learning_rate": 9.830319857054852e-05, |
|
"loss": 0.0702, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 1.308562197092084, |
|
"grad_norm": 0.6068538427352905, |
|
"learning_rate": 9.823350584809105e-05, |
|
"loss": 0.0738, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 1.3247172859450727, |
|
"grad_norm": 0.6218283176422119, |
|
"learning_rate": 9.816243625578432e-05, |
|
"loss": 0.0756, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 1.3408723747980613, |
|
"grad_norm": 0.7377462983131409, |
|
"learning_rate": 9.808999182237528e-05, |
|
"loss": 0.0692, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 1.35702746365105, |
|
"grad_norm": 0.5580537915229797, |
|
"learning_rate": 9.8016174615857e-05, |
|
"loss": 0.0633, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 1.3731825525040389, |
|
"grad_norm": 0.615639328956604, |
|
"learning_rate": 9.794098674340965e-05, |
|
"loss": 0.0718, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 1.3893376413570275, |
|
"grad_norm": 0.7121309041976929, |
|
"learning_rate": 9.78644303513403e-05, |
|
"loss": 0.0633, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 1.4054927302100162, |
|
"grad_norm": 0.5688542127609253, |
|
"learning_rate": 9.778650762502166e-05, |
|
"loss": 0.0678, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 1.4216478190630049, |
|
"grad_norm": 0.5155729651451111, |
|
"learning_rate": 9.770722078882973e-05, |
|
"loss": 0.0665, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 1.4378029079159935, |
|
"grad_norm": 0.48947158455848694, |
|
"learning_rate": 9.762657210608029e-05, |
|
"loss": 0.0657, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 1.4539579967689822, |
|
"grad_norm": 0.7648037075996399, |
|
"learning_rate": 9.754456387896422e-05, |
|
"loss": 0.0707, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 1.4701130856219708, |
|
"grad_norm": 0.8872023224830627, |
|
"learning_rate": 9.746119844848195e-05, |
|
"loss": 0.062, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 1.4862681744749597, |
|
"grad_norm": 1.083450436592102, |
|
"learning_rate": 9.737647819437645e-05, |
|
"loss": 0.0728, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 1.5024232633279482, |
|
"grad_norm": 0.6818684339523315, |
|
"learning_rate": 9.729040553506539e-05, |
|
"loss": 0.0637, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 1.518578352180937, |
|
"grad_norm": 0.7897723913192749, |
|
"learning_rate": 9.720298292757215e-05, |
|
"loss": 0.0682, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 1.5347334410339257, |
|
"grad_norm": 0.46110132336616516, |
|
"learning_rate": 9.711421286745555e-05, |
|
"loss": 0.0726, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 1.5508885298869144, |
|
"grad_norm": 0.4637523293495178, |
|
"learning_rate": 9.70240978887387e-05, |
|
"loss": 0.0622, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 1.567043618739903, |
|
"grad_norm": 0.7092505693435669, |
|
"learning_rate": 9.69326405638367e-05, |
|
"loss": 0.0592, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 1.5831987075928917, |
|
"grad_norm": 0.5965023040771484, |
|
"learning_rate": 9.683984350348312e-05, |
|
"loss": 0.0697, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 1.5993537964458806, |
|
"grad_norm": 0.5201593041419983, |
|
"learning_rate": 9.67457093566555e-05, |
|
"loss": 0.0706, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 1.615508885298869, |
|
"grad_norm": 0.6693015098571777, |
|
"learning_rate": 9.665024081049977e-05, |
|
"loss": 0.0653, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.631663974151858, |
|
"grad_norm": 0.7377516627311707, |
|
"learning_rate": 9.655344059025351e-05, |
|
"loss": 0.061, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 1.6478190630048464, |
|
"grad_norm": 0.6737310290336609, |
|
"learning_rate": 9.645531145916817e-05, |
|
"loss": 0.0552, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 1.6639741518578353, |
|
"grad_norm": 0.6933907866477966, |
|
"learning_rate": 9.635585621843018e-05, |
|
"loss": 0.0671, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 1.680129240710824, |
|
"grad_norm": 0.6938374638557434, |
|
"learning_rate": 9.625507770708097e-05, |
|
"loss": 0.068, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 1.6962843295638126, |
|
"grad_norm": 0.5320965051651001, |
|
"learning_rate": 9.615297880193598e-05, |
|
"loss": 0.0632, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 1.7124394184168013, |
|
"grad_norm": 0.6312500238418579, |
|
"learning_rate": 9.60495624175025e-05, |
|
"loss": 0.0706, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 1.72859450726979, |
|
"grad_norm": 0.5120170712471008, |
|
"learning_rate": 9.594483150589646e-05, |
|
"loss": 0.0706, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 1.7447495961227788, |
|
"grad_norm": 0.6575292348861694, |
|
"learning_rate": 9.58387890567582e-05, |
|
"loss": 0.066, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 1.7609046849757672, |
|
"grad_norm": 0.8916189670562744, |
|
"learning_rate": 9.573143809716711e-05, |
|
"loss": 0.0572, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 1.7770597738287561, |
|
"grad_norm": 0.7182980179786682, |
|
"learning_rate": 9.562278169155518e-05, |
|
"loss": 0.061, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 1.7932148626817448, |
|
"grad_norm": 0.5273639559745789, |
|
"learning_rate": 9.551282294161962e-05, |
|
"loss": 0.0564, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 1.8093699515347335, |
|
"grad_norm": 0.5014919638633728, |
|
"learning_rate": 9.540156498623418e-05, |
|
"loss": 0.0674, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 1.8255250403877221, |
|
"grad_norm": 0.49997884035110474, |
|
"learning_rate": 9.528901100135971e-05, |
|
"loss": 0.0719, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 1.8416801292407108, |
|
"grad_norm": 0.5391654968261719, |
|
"learning_rate": 9.517516419995335e-05, |
|
"loss": 0.0634, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 1.8578352180936997, |
|
"grad_norm": 0.5763863921165466, |
|
"learning_rate": 9.506002783187691e-05, |
|
"loss": 0.0622, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 1.8739903069466881, |
|
"grad_norm": 0.5951936841011047, |
|
"learning_rate": 9.494360518380405e-05, |
|
"loss": 0.066, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 1.890145395799677, |
|
"grad_norm": 0.7027397751808167, |
|
"learning_rate": 9.482589957912651e-05, |
|
"loss": 0.0623, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 1.9063004846526654, |
|
"grad_norm": 0.6542057991027832, |
|
"learning_rate": 9.470691437785918e-05, |
|
"loss": 0.0635, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 1.9224555735056543, |
|
"grad_norm": 0.39720842242240906, |
|
"learning_rate": 9.45866529765442e-05, |
|
"loss": 0.064, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 1.938610662358643, |
|
"grad_norm": 0.47394442558288574, |
|
"learning_rate": 9.446511880815407e-05, |
|
"loss": 0.0595, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 1.9547657512116317, |
|
"grad_norm": 0.3531631529331207, |
|
"learning_rate": 9.434231534199356e-05, |
|
"loss": 0.0583, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 1.9709208400646203, |
|
"grad_norm": 0.6005557775497437, |
|
"learning_rate": 9.421824608360068e-05, |
|
"loss": 0.0599, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 1.987075928917609, |
|
"grad_norm": 0.5101392269134521, |
|
"learning_rate": 9.409291457464672e-05, |
|
"loss": 0.0617, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 2.003231017770598, |
|
"grad_norm": 0.42682531476020813, |
|
"learning_rate": 9.396632439283501e-05, |
|
"loss": 0.0554, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 2.0193861066235863, |
|
"grad_norm": 0.6450132727622986, |
|
"learning_rate": 9.383847915179892e-05, |
|
"loss": 0.0677, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 2.035541195476575, |
|
"grad_norm": 0.399069607257843, |
|
"learning_rate": 9.370938250099857e-05, |
|
"loss": 0.0618, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 2.0516962843295636, |
|
"grad_norm": 0.4468577802181244, |
|
"learning_rate": 9.357903812561679e-05, |
|
"loss": 0.0685, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 2.0678513731825525, |
|
"grad_norm": 0.559262752532959, |
|
"learning_rate": 9.344744974645381e-05, |
|
"loss": 0.0637, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 2.0840064620355414, |
|
"grad_norm": 0.5825755596160889, |
|
"learning_rate": 9.33146211198211e-05, |
|
"loss": 0.0625, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 2.10016155088853, |
|
"grad_norm": 1.090774655342102, |
|
"learning_rate": 9.318055603743418e-05, |
|
"loss": 0.0691, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 2.1163166397415187, |
|
"grad_norm": 0.3790472149848938, |
|
"learning_rate": 9.304525832630426e-05, |
|
"loss": 0.0572, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 2.132471728594507, |
|
"grad_norm": 0.46555569767951965, |
|
"learning_rate": 9.290873184862917e-05, |
|
"loss": 0.0611, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 2.148626817447496, |
|
"grad_norm": 0.5333315134048462, |
|
"learning_rate": 9.277098050168293e-05, |
|
"loss": 0.0554, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 2.1647819063004845, |
|
"grad_norm": 0.5820637345314026, |
|
"learning_rate": 9.263200821770461e-05, |
|
"loss": 0.0593, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 2.1809369951534734, |
|
"grad_norm": 0.5108340978622437, |
|
"learning_rate": 9.249181896378607e-05, |
|
"loss": 0.0561, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 2.197092084006462, |
|
"grad_norm": 0.44887450337409973, |
|
"learning_rate": 9.235041674175868e-05, |
|
"loss": 0.0608, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 2.2132471728594507, |
|
"grad_norm": 0.462615042924881, |
|
"learning_rate": 9.22078055880791e-05, |
|
"loss": 0.0495, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 2.2294022617124396, |
|
"grad_norm": 0.48509976267814636, |
|
"learning_rate": 9.206398957371406e-05, |
|
"loss": 0.0589, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 2.245557350565428, |
|
"grad_norm": 0.48090824484825134, |
|
"learning_rate": 9.191897280402415e-05, |
|
"loss": 0.0521, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 2.261712439418417, |
|
"grad_norm": 0.5474804043769836, |
|
"learning_rate": 9.177275941864662e-05, |
|
"loss": 0.0591, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 2.2778675282714054, |
|
"grad_norm": 0.6736873984336853, |
|
"learning_rate": 9.162535359137725e-05, |
|
"loss": 0.0532, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 2.2940226171243943, |
|
"grad_norm": 0.4108855426311493, |
|
"learning_rate": 9.147675953005112e-05, |
|
"loss": 0.0608, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 2.3101777059773827, |
|
"grad_norm": 0.6929683685302734, |
|
"learning_rate": 9.132698147642258e-05, |
|
"loss": 0.0572, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 2.3263327948303716, |
|
"grad_norm": 0.662588357925415, |
|
"learning_rate": 9.117602370604412e-05, |
|
"loss": 0.0606, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 2.3424878836833605, |
|
"grad_norm": 0.6598329544067383, |
|
"learning_rate": 9.102389052814435e-05, |
|
"loss": 0.0617, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 2.358642972536349, |
|
"grad_norm": 0.6328267455101013, |
|
"learning_rate": 9.087058628550492e-05, |
|
"loss": 0.0635, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 2.374798061389338, |
|
"grad_norm": 0.7304327487945557, |
|
"learning_rate": 9.071611535433665e-05, |
|
"loss": 0.0636, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 2.3909531502423262, |
|
"grad_norm": 0.7994436621665955, |
|
"learning_rate": 9.056048214415456e-05, |
|
"loss": 0.0682, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 2.407108239095315, |
|
"grad_norm": 0.5563200116157532, |
|
"learning_rate": 9.040369109765196e-05, |
|
"loss": 0.0602, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 2.4232633279483036, |
|
"grad_norm": 0.862169623374939, |
|
"learning_rate": 9.024574669057368e-05, |
|
"loss": 0.0694, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 2.4394184168012925, |
|
"grad_norm": 0.5530250668525696, |
|
"learning_rate": 9.00866534315883e-05, |
|
"loss": 0.0621, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 2.4555735056542813, |
|
"grad_norm": 0.5109930634498596, |
|
"learning_rate": 8.992641586215944e-05, |
|
"loss": 0.0568, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 2.47172859450727, |
|
"grad_norm": 0.772769570350647, |
|
"learning_rate": 8.97650385564161e-05, |
|
"loss": 0.0634, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 2.4878836833602587, |
|
"grad_norm": 0.7233314514160156, |
|
"learning_rate": 8.960252612102209e-05, |
|
"loss": 0.0682, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 2.504038772213247, |
|
"grad_norm": 0.9270318746566772, |
|
"learning_rate": 8.943888319504457e-05, |
|
"loss": 0.0616, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 2.520193861066236, |
|
"grad_norm": 1.1452592611312866, |
|
"learning_rate": 8.927411444982157e-05, |
|
"loss": 0.0536, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 2.5363489499192244, |
|
"grad_norm": 0.8335738778114319, |
|
"learning_rate": 8.91082245888287e-05, |
|
"loss": 0.0588, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 2.5525040387722133, |
|
"grad_norm": 0.5370670557022095, |
|
"learning_rate": 8.894121834754481e-05, |
|
"loss": 0.0593, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 2.568659127625202, |
|
"grad_norm": 0.5532761216163635, |
|
"learning_rate": 8.877310049331691e-05, |
|
"loss": 0.0601, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 2.5848142164781907, |
|
"grad_norm": 0.6287941932678223, |
|
"learning_rate": 8.860387582522397e-05, |
|
"loss": 0.0627, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 2.600969305331179, |
|
"grad_norm": 0.6329537034034729, |
|
"learning_rate": 8.843354917394e-05, |
|
"loss": 0.0572, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 2.617124394184168, |
|
"grad_norm": 0.4902884364128113, |
|
"learning_rate": 8.826212540159615e-05, |
|
"loss": 0.0528, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 2.633279483037157, |
|
"grad_norm": 0.4139235019683838, |
|
"learning_rate": 8.808960940164188e-05, |
|
"loss": 0.0591, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 2.6494345718901453, |
|
"grad_norm": 0.481642484664917, |
|
"learning_rate": 8.79160060987053e-05, |
|
"loss": 0.063, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 2.665589660743134, |
|
"grad_norm": 0.575612485408783, |
|
"learning_rate": 8.77413204484526e-05, |
|
"loss": 0.0682, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 2.6817447495961226, |
|
"grad_norm": 0.7415863871574402, |
|
"learning_rate": 8.756555743744655e-05, |
|
"loss": 0.0488, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 2.6978998384491115, |
|
"grad_norm": 0.614101767539978, |
|
"learning_rate": 8.738872208300417e-05, |
|
"loss": 0.0627, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 2.7140549273021, |
|
"grad_norm": 0.5911862850189209, |
|
"learning_rate": 8.721081943305356e-05, |
|
"loss": 0.0622, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 2.730210016155089, |
|
"grad_norm": 0.5863639116287231, |
|
"learning_rate": 8.703185456598968e-05, |
|
"loss": 0.0598, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 2.7463651050080777, |
|
"grad_norm": 0.6773284077644348, |
|
"learning_rate": 8.685183259052952e-05, |
|
"loss": 0.0591, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 2.762520193861066, |
|
"grad_norm": 0.48102864623069763, |
|
"learning_rate": 8.667075864556615e-05, |
|
"loss": 0.0554, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 2.778675282714055, |
|
"grad_norm": 0.6997978687286377, |
|
"learning_rate": 8.648863790002213e-05, |
|
"loss": 0.0605, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 2.7948303715670435, |
|
"grad_norm": 0.6587175130844116, |
|
"learning_rate": 8.630547555270188e-05, |
|
"loss": 0.064, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 2.8109854604200324, |
|
"grad_norm": 0.8421849608421326, |
|
"learning_rate": 8.612127683214329e-05, |
|
"loss": 0.0523, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 2.827140549273021, |
|
"grad_norm": 0.3728615939617157, |
|
"learning_rate": 8.59360469964685e-05, |
|
"loss": 0.057, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 2.8432956381260097, |
|
"grad_norm": 0.6552137732505798, |
|
"learning_rate": 8.574979133323377e-05, |
|
"loss": 0.0605, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 2.8594507269789986, |
|
"grad_norm": 0.7351179718971252, |
|
"learning_rate": 8.556251515927855e-05, |
|
"loss": 0.0566, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 2.875605815831987, |
|
"grad_norm": 0.5557317733764648, |
|
"learning_rate": 8.537422382057374e-05, |
|
"loss": 0.0531, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 2.891760904684976, |
|
"grad_norm": 0.5497432351112366, |
|
"learning_rate": 8.518492269206899e-05, |
|
"loss": 0.0588, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 2.9079159935379644, |
|
"grad_norm": 0.6453426480293274, |
|
"learning_rate": 8.499461717753939e-05, |
|
"loss": 0.0589, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 2.9240710823909533, |
|
"grad_norm": 0.5362476706504822, |
|
"learning_rate": 8.480331270943111e-05, |
|
"loss": 0.0626, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 2.9402261712439417, |
|
"grad_norm": 0.42626962065696716, |
|
"learning_rate": 8.461101474870641e-05, |
|
"loss": 0.0495, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 2.9563812600969306, |
|
"grad_norm": 0.5444236397743225, |
|
"learning_rate": 8.44177287846877e-05, |
|
"loss": 0.0558, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 2.9725363489499195, |
|
"grad_norm": 0.5531013607978821, |
|
"learning_rate": 8.422346033490082e-05, |
|
"loss": 0.0497, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 2.988691437802908, |
|
"grad_norm": 0.5683811902999878, |
|
"learning_rate": 8.402821494491762e-05, |
|
"loss": 0.0528, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 3.004846526655897, |
|
"grad_norm": 0.5049775838851929, |
|
"learning_rate": 8.383199818819758e-05, |
|
"loss": 0.0616, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 3.0210016155088852, |
|
"grad_norm": 0.38788193464279175, |
|
"learning_rate": 8.363481566592874e-05, |
|
"loss": 0.0549, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 3.037156704361874, |
|
"grad_norm": 0.543121337890625, |
|
"learning_rate": 8.34366730068678e-05, |
|
"loss": 0.0561, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 3.0533117932148626, |
|
"grad_norm": 0.48212480545043945, |
|
"learning_rate": 8.323757586717947e-05, |
|
"loss": 0.0473, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 3.0694668820678515, |
|
"grad_norm": 0.7454380393028259, |
|
"learning_rate": 8.303752993027498e-05, |
|
"loss": 0.0564, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 3.08562197092084, |
|
"grad_norm": 0.5166053175926208, |
|
"learning_rate": 8.283654090664985e-05, |
|
"loss": 0.0571, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 3.101777059773829, |
|
"grad_norm": 0.5176417231559753, |
|
"learning_rate": 8.263461453372086e-05, |
|
"loss": 0.0593, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 3.1179321486268172, |
|
"grad_norm": 0.6009415984153748, |
|
"learning_rate": 8.243175657566233e-05, |
|
"loss": 0.0518, |
|
"step": 1930 |
|
}, |
|
{ |
|
"epoch": 3.134087237479806, |
|
"grad_norm": 0.4920412302017212, |
|
"learning_rate": 8.222797282324152e-05, |
|
"loss": 0.0517, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 3.150242326332795, |
|
"grad_norm": 0.5730708241462708, |
|
"learning_rate": 8.20232690936533e-05, |
|
"loss": 0.055, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 3.1663974151857834, |
|
"grad_norm": 0.5689309239387512, |
|
"learning_rate": 8.18176512303542e-05, |
|
"loss": 0.0462, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 3.1825525040387723, |
|
"grad_norm": 0.3386596143245697, |
|
"learning_rate": 8.161112510289549e-05, |
|
"loss": 0.0593, |
|
"step": 1970 |
|
}, |
|
{ |
|
"epoch": 3.1987075928917608, |
|
"grad_norm": 0.5641984939575195, |
|
"learning_rate": 8.140369660675571e-05, |
|
"loss": 0.0561, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 3.2148626817447497, |
|
"grad_norm": 0.3364955186843872, |
|
"learning_rate": 8.119537166317232e-05, |
|
"loss": 0.0488, |
|
"step": 1990 |
|
}, |
|
{ |
|
"epoch": 3.231017770597738, |
|
"grad_norm": 0.5797820687294006, |
|
"learning_rate": 8.098615621897272e-05, |
|
"loss": 0.0471, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 3.247172859450727, |
|
"grad_norm": 0.6893600225448608, |
|
"learning_rate": 8.077605624640448e-05, |
|
"loss": 0.0489, |
|
"step": 2010 |
|
}, |
|
{ |
|
"epoch": 3.263327948303716, |
|
"grad_norm": 0.6242002844810486, |
|
"learning_rate": 8.056507774296477e-05, |
|
"loss": 0.0502, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 3.2794830371567043, |
|
"grad_norm": 0.29608842730522156, |
|
"learning_rate": 8.035322673122934e-05, |
|
"loss": 0.0574, |
|
"step": 2030 |
|
}, |
|
{ |
|
"epoch": 3.295638126009693, |
|
"grad_norm": 0.39050793647766113, |
|
"learning_rate": 8.014050925868042e-05, |
|
"loss": 0.0553, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 3.3117932148626816, |
|
"grad_norm": 0.7243764400482178, |
|
"learning_rate": 7.99269313975342e-05, |
|
"loss": 0.0496, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 3.3279483037156705, |
|
"grad_norm": 0.6739727258682251, |
|
"learning_rate": 7.971249924456742e-05, |
|
"loss": 0.0486, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 3.344103392568659, |
|
"grad_norm": 0.4816618263721466, |
|
"learning_rate": 7.94972189209434e-05, |
|
"loss": 0.0455, |
|
"step": 2070 |
|
}, |
|
{ |
|
"epoch": 3.360258481421648, |
|
"grad_norm": 0.5240322351455688, |
|
"learning_rate": 7.928109657203725e-05, |
|
"loss": 0.0573, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 3.3764135702746367, |
|
"grad_norm": 0.3253321051597595, |
|
"learning_rate": 7.906413836726048e-05, |
|
"loss": 0.0467, |
|
"step": 2090 |
|
}, |
|
{ |
|
"epoch": 3.392568659127625, |
|
"grad_norm": 0.5213293433189392, |
|
"learning_rate": 7.884635049988488e-05, |
|
"loss": 0.0488, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 3.408723747980614, |
|
"grad_norm": 0.4129197895526886, |
|
"learning_rate": 7.86277391868657e-05, |
|
"loss": 0.0483, |
|
"step": 2110 |
|
}, |
|
{ |
|
"epoch": 3.4248788368336025, |
|
"grad_norm": 0.5131278038024902, |
|
"learning_rate": 7.840831066866423e-05, |
|
"loss": 0.0429, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 3.4410339256865914, |
|
"grad_norm": 0.529063880443573, |
|
"learning_rate": 7.818807120906964e-05, |
|
"loss": 0.0536, |
|
"step": 2130 |
|
}, |
|
{ |
|
"epoch": 3.45718901453958, |
|
"grad_norm": 0.6816816926002502, |
|
"learning_rate": 7.796702709502012e-05, |
|
"loss": 0.0514, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 3.4733441033925687, |
|
"grad_norm": 0.3989129066467285, |
|
"learning_rate": 7.774518463642351e-05, |
|
"loss": 0.0613, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 3.489499192245557, |
|
"grad_norm": 0.4334792494773865, |
|
"learning_rate": 7.75225501659771e-05, |
|
"loss": 0.0483, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 3.505654281098546, |
|
"grad_norm": 0.46373841166496277, |
|
"learning_rate": 7.729913003898694e-05, |
|
"loss": 0.0443, |
|
"step": 2170 |
|
}, |
|
{ |
|
"epoch": 3.5218093699515345, |
|
"grad_norm": 0.3799467980861664, |
|
"learning_rate": 7.707493063318629e-05, |
|
"loss": 0.0511, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 3.5379644588045234, |
|
"grad_norm": 0.4075853228569031, |
|
"learning_rate": 7.684995834855372e-05, |
|
"loss": 0.0478, |
|
"step": 2190 |
|
}, |
|
{ |
|
"epoch": 3.5541195476575123, |
|
"grad_norm": 0.39337170124053955, |
|
"learning_rate": 7.662421960713028e-05, |
|
"loss": 0.0484, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 3.5702746365105007, |
|
"grad_norm": 0.30496665835380554, |
|
"learning_rate": 7.639772085283628e-05, |
|
"loss": 0.0446, |
|
"step": 2210 |
|
}, |
|
{ |
|
"epoch": 3.5864297253634896, |
|
"grad_norm": 0.36177757382392883, |
|
"learning_rate": 7.617046855128724e-05, |
|
"loss": 0.0469, |
|
"step": 2220 |
|
}, |
|
{ |
|
"epoch": 3.602584814216478, |
|
"grad_norm": 0.39714500308036804, |
|
"learning_rate": 7.594246918960946e-05, |
|
"loss": 0.0433, |
|
"step": 2230 |
|
}, |
|
{ |
|
"epoch": 3.618739903069467, |
|
"grad_norm": 0.40002134442329407, |
|
"learning_rate": 7.571372927625469e-05, |
|
"loss": 0.0518, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 3.6348949919224554, |
|
"grad_norm": 0.6046271324157715, |
|
"learning_rate": 7.548425534081442e-05, |
|
"loss": 0.052, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 3.6510500807754442, |
|
"grad_norm": 0.43297943472862244, |
|
"learning_rate": 7.525405393383351e-05, |
|
"loss": 0.0462, |
|
"step": 2260 |
|
}, |
|
{ |
|
"epoch": 3.667205169628433, |
|
"grad_norm": 0.4702610671520233, |
|
"learning_rate": 7.502313162662315e-05, |
|
"loss": 0.0543, |
|
"step": 2270 |
|
}, |
|
{ |
|
"epoch": 3.6833602584814216, |
|
"grad_norm": 0.3743409216403961, |
|
"learning_rate": 7.479149501107328e-05, |
|
"loss": 0.0472, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 3.6995153473344105, |
|
"grad_norm": 0.3397691249847412, |
|
"learning_rate": 7.455915069946444e-05, |
|
"loss": 0.045, |
|
"step": 2290 |
|
}, |
|
{ |
|
"epoch": 3.715670436187399, |
|
"grad_norm": 0.39391201734542847, |
|
"learning_rate": 7.4326105324279e-05, |
|
"loss": 0.0407, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 3.731825525040388, |
|
"grad_norm": 0.5775906443595886, |
|
"learning_rate": 7.409236553801183e-05, |
|
"loss": 0.0511, |
|
"step": 2310 |
|
}, |
|
{ |
|
"epoch": 3.7479806138933762, |
|
"grad_norm": 0.5497547388076782, |
|
"learning_rate": 7.385793801298042e-05, |
|
"loss": 0.0426, |
|
"step": 2320 |
|
}, |
|
{ |
|
"epoch": 3.764135702746365, |
|
"grad_norm": 0.4124547243118286, |
|
"learning_rate": 7.36228294411344e-05, |
|
"loss": 0.05, |
|
"step": 2330 |
|
}, |
|
{ |
|
"epoch": 3.780290791599354, |
|
"grad_norm": 0.4284408390522003, |
|
"learning_rate": 7.338704653386448e-05, |
|
"loss": 0.0498, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 3.7964458804523424, |
|
"grad_norm": 0.47924646735191345, |
|
"learning_rate": 7.315059602181092e-05, |
|
"loss": 0.0491, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 3.8126009693053313, |
|
"grad_norm": 0.34164971113204956, |
|
"learning_rate": 7.291348465467136e-05, |
|
"loss": 0.0503, |
|
"step": 2360 |
|
}, |
|
{ |
|
"epoch": 3.8287560581583198, |
|
"grad_norm": 0.4297367334365845, |
|
"learning_rate": 7.267571920100816e-05, |
|
"loss": 0.0505, |
|
"step": 2370 |
|
}, |
|
{ |
|
"epoch": 3.8449111470113086, |
|
"grad_norm": 0.45141902565956116, |
|
"learning_rate": 7.24373064480552e-05, |
|
"loss": 0.0442, |
|
"step": 2380 |
|
}, |
|
{ |
|
"epoch": 3.861066235864297, |
|
"grad_norm": 0.4785975217819214, |
|
"learning_rate": 7.219825320152411e-05, |
|
"loss": 0.0538, |
|
"step": 2390 |
|
}, |
|
{ |
|
"epoch": 3.877221324717286, |
|
"grad_norm": 0.3574664890766144, |
|
"learning_rate": 7.195856628540995e-05, |
|
"loss": 0.0499, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 3.893376413570275, |
|
"grad_norm": 0.40025898814201355, |
|
"learning_rate": 7.171825254179654e-05, |
|
"loss": 0.0429, |
|
"step": 2410 |
|
}, |
|
{ |
|
"epoch": 3.9095315024232633, |
|
"grad_norm": 0.3183038830757141, |
|
"learning_rate": 7.1477318830661e-05, |
|
"loss": 0.0466, |
|
"step": 2420 |
|
}, |
|
{ |
|
"epoch": 3.9256865912762517, |
|
"grad_norm": 0.4639292061328888, |
|
"learning_rate": 7.123577202967805e-05, |
|
"loss": 0.0446, |
|
"step": 2430 |
|
}, |
|
{ |
|
"epoch": 3.9418416801292406, |
|
"grad_norm": 0.5322105884552002, |
|
"learning_rate": 7.099361903402359e-05, |
|
"loss": 0.0495, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 3.9579967689822295, |
|
"grad_norm": 0.3138383626937866, |
|
"learning_rate": 7.075086675617788e-05, |
|
"loss": 0.0444, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 3.974151857835218, |
|
"grad_norm": 0.544747531414032, |
|
"learning_rate": 7.050752212572831e-05, |
|
"loss": 0.0541, |
|
"step": 2460 |
|
}, |
|
{ |
|
"epoch": 3.990306946688207, |
|
"grad_norm": 0.4654453694820404, |
|
"learning_rate": 7.026359208917148e-05, |
|
"loss": 0.0504, |
|
"step": 2470 |
|
}, |
|
{ |
|
"epoch": 4.006462035541196, |
|
"grad_norm": 0.31848329305648804, |
|
"learning_rate": 7.001908360971494e-05, |
|
"loss": 0.0451, |
|
"step": 2480 |
|
}, |
|
{ |
|
"epoch": 4.022617124394184, |
|
"grad_norm": 0.43173283338546753, |
|
"learning_rate": 6.977400366707847e-05, |
|
"loss": 0.0467, |
|
"step": 2490 |
|
}, |
|
{ |
|
"epoch": 4.038772213247173, |
|
"grad_norm": 0.5474691390991211, |
|
"learning_rate": 6.952835925729472e-05, |
|
"loss": 0.0479, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 4.054927302100162, |
|
"grad_norm": 0.4897683560848236, |
|
"learning_rate": 6.928215739250963e-05, |
|
"loss": 0.0505, |
|
"step": 2510 |
|
}, |
|
{ |
|
"epoch": 4.07108239095315, |
|
"grad_norm": 0.31264185905456543, |
|
"learning_rate": 6.903540510078219e-05, |
|
"loss": 0.0457, |
|
"step": 2520 |
|
}, |
|
{ |
|
"epoch": 4.087237479806139, |
|
"grad_norm": 0.4703519642353058, |
|
"learning_rate": 6.878810942588383e-05, |
|
"loss": 0.0451, |
|
"step": 2530 |
|
}, |
|
{ |
|
"epoch": 4.103392568659127, |
|
"grad_norm": 0.3018874228000641, |
|
"learning_rate": 6.85402774270974e-05, |
|
"loss": 0.0449, |
|
"step": 2540 |
|
}, |
|
{ |
|
"epoch": 4.119547657512117, |
|
"grad_norm": 0.3613886535167694, |
|
"learning_rate": 6.829191617901551e-05, |
|
"loss": 0.0481, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 4.135702746365105, |
|
"grad_norm": 0.34348440170288086, |
|
"learning_rate": 6.804303277133877e-05, |
|
"loss": 0.0396, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 4.1518578352180935, |
|
"grad_norm": 0.44307631254196167, |
|
"learning_rate": 6.779363430867326e-05, |
|
"loss": 0.0459, |
|
"step": 2570 |
|
}, |
|
{ |
|
"epoch": 4.168012924071083, |
|
"grad_norm": 0.5705850124359131, |
|
"learning_rate": 6.754372791032783e-05, |
|
"loss": 0.0468, |
|
"step": 2580 |
|
}, |
|
{ |
|
"epoch": 4.184168012924071, |
|
"grad_norm": 0.3443628251552582, |
|
"learning_rate": 6.729332071011077e-05, |
|
"loss": 0.0452, |
|
"step": 2590 |
|
}, |
|
{ |
|
"epoch": 4.20032310177706, |
|
"grad_norm": 0.4537239372730255, |
|
"learning_rate": 6.704241985612625e-05, |
|
"loss": 0.0446, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 4.216478190630048, |
|
"grad_norm": 0.3705506920814514, |
|
"learning_rate": 6.679103251057024e-05, |
|
"loss": 0.0384, |
|
"step": 2610 |
|
}, |
|
{ |
|
"epoch": 4.2326332794830375, |
|
"grad_norm": 0.5850950479507446, |
|
"learning_rate": 6.653916584952607e-05, |
|
"loss": 0.0483, |
|
"step": 2620 |
|
}, |
|
{ |
|
"epoch": 4.248788368336026, |
|
"grad_norm": 0.7132898569107056, |
|
"learning_rate": 6.628682706275953e-05, |
|
"loss": 0.0432, |
|
"step": 2630 |
|
}, |
|
{ |
|
"epoch": 4.264943457189014, |
|
"grad_norm": 0.3713912069797516, |
|
"learning_rate": 6.603402335351371e-05, |
|
"loss": 0.0382, |
|
"step": 2640 |
|
}, |
|
{ |
|
"epoch": 4.281098546042003, |
|
"grad_norm": 0.6300288438796997, |
|
"learning_rate": 6.578076193830335e-05, |
|
"loss": 0.0444, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 4.297253634894992, |
|
"grad_norm": 0.5276614427566528, |
|
"learning_rate": 6.55270500467088e-05, |
|
"loss": 0.0554, |
|
"step": 2660 |
|
}, |
|
{ |
|
"epoch": 4.313408723747981, |
|
"grad_norm": 0.38638073205947876, |
|
"learning_rate": 6.527289492116968e-05, |
|
"loss": 0.054, |
|
"step": 2670 |
|
}, |
|
{ |
|
"epoch": 4.329563812600969, |
|
"grad_norm": 0.7961811423301697, |
|
"learning_rate": 6.501830381677813e-05, |
|
"loss": 0.0529, |
|
"step": 2680 |
|
}, |
|
{ |
|
"epoch": 4.345718901453958, |
|
"grad_norm": 0.3550907373428345, |
|
"learning_rate": 6.476328400107171e-05, |
|
"loss": 0.0488, |
|
"step": 2690 |
|
}, |
|
{ |
|
"epoch": 4.361873990306947, |
|
"grad_norm": 0.5453242659568787, |
|
"learning_rate": 6.450784275382595e-05, |
|
"loss": 0.0503, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 4.378029079159935, |
|
"grad_norm": 0.4048435688018799, |
|
"learning_rate": 6.425198736684655e-05, |
|
"loss": 0.0474, |
|
"step": 2710 |
|
}, |
|
{ |
|
"epoch": 4.394184168012924, |
|
"grad_norm": 0.47286099195480347, |
|
"learning_rate": 6.399572514376113e-05, |
|
"loss": 0.0406, |
|
"step": 2720 |
|
}, |
|
{ |
|
"epoch": 4.410339256865913, |
|
"grad_norm": 0.28871116042137146, |
|
"learning_rate": 6.373906339981092e-05, |
|
"loss": 0.0465, |
|
"step": 2730 |
|
}, |
|
{ |
|
"epoch": 4.426494345718901, |
|
"grad_norm": 0.686854362487793, |
|
"learning_rate": 6.348200946164178e-05, |
|
"loss": 0.0477, |
|
"step": 2740 |
|
}, |
|
{ |
|
"epoch": 4.44264943457189, |
|
"grad_norm": 0.7823249101638794, |
|
"learning_rate": 6.322457066709511e-05, |
|
"loss": 0.0407, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 4.458804523424879, |
|
"grad_norm": 0.4921092987060547, |
|
"learning_rate": 6.296675436499844e-05, |
|
"loss": 0.0408, |
|
"step": 2760 |
|
}, |
|
{ |
|
"epoch": 4.474959612277868, |
|
"grad_norm": 0.5457318425178528, |
|
"learning_rate": 6.270856791495556e-05, |
|
"loss": 0.0421, |
|
"step": 2770 |
|
}, |
|
{ |
|
"epoch": 4.491114701130856, |
|
"grad_norm": 0.7720049619674683, |
|
"learning_rate": 6.245001868713649e-05, |
|
"loss": 0.0495, |
|
"step": 2780 |
|
}, |
|
{ |
|
"epoch": 4.5072697899838445, |
|
"grad_norm": 0.4767976999282837, |
|
"learning_rate": 6.219111406206707e-05, |
|
"loss": 0.0446, |
|
"step": 2790 |
|
}, |
|
{ |
|
"epoch": 4.523424878836834, |
|
"grad_norm": 0.46596401929855347, |
|
"learning_rate": 6.193186143041828e-05, |
|
"loss": 0.044, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 4.539579967689822, |
|
"grad_norm": 0.4272357225418091, |
|
"learning_rate": 6.167226819279528e-05, |
|
"loss": 0.043, |
|
"step": 2810 |
|
}, |
|
{ |
|
"epoch": 4.555735056542811, |
|
"grad_norm": 0.39680230617523193, |
|
"learning_rate": 6.141234175952612e-05, |
|
"loss": 0.0376, |
|
"step": 2820 |
|
}, |
|
{ |
|
"epoch": 4.5718901453958, |
|
"grad_norm": 0.42455387115478516, |
|
"learning_rate": 6.115208955045025e-05, |
|
"loss": 0.0415, |
|
"step": 2830 |
|
}, |
|
{ |
|
"epoch": 4.5880452342487885, |
|
"grad_norm": 0.4186107814311981, |
|
"learning_rate": 6.089151899470668e-05, |
|
"loss": 0.0394, |
|
"step": 2840 |
|
}, |
|
{ |
|
"epoch": 4.604200323101777, |
|
"grad_norm": 0.4375015199184418, |
|
"learning_rate": 6.0630637530521905e-05, |
|
"loss": 0.0392, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 4.620355411954765, |
|
"grad_norm": 0.4540638327598572, |
|
"learning_rate": 6.036945260499762e-05, |
|
"loss": 0.0498, |
|
"step": 2860 |
|
}, |
|
{ |
|
"epoch": 4.636510500807755, |
|
"grad_norm": 0.33841922879219055, |
|
"learning_rate": 6.010797167389808e-05, |
|
"loss": 0.0403, |
|
"step": 2870 |
|
}, |
|
{ |
|
"epoch": 4.652665589660743, |
|
"grad_norm": 0.4046776592731476, |
|
"learning_rate": 5.9846202201437285e-05, |
|
"loss": 0.0394, |
|
"step": 2880 |
|
}, |
|
{ |
|
"epoch": 4.668820678513732, |
|
"grad_norm": 0.5421432852745056, |
|
"learning_rate": 5.9584151660065946e-05, |
|
"loss": 0.0433, |
|
"step": 2890 |
|
}, |
|
{ |
|
"epoch": 4.684975767366721, |
|
"grad_norm": 0.38528770208358765, |
|
"learning_rate": 5.93218275302581e-05, |
|
"loss": 0.0421, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 4.701130856219709, |
|
"grad_norm": 0.4037356376647949, |
|
"learning_rate": 5.9059237300297656e-05, |
|
"loss": 0.0467, |
|
"step": 2910 |
|
}, |
|
{ |
|
"epoch": 4.717285945072698, |
|
"grad_norm": 0.3471173644065857, |
|
"learning_rate": 5.879638846606459e-05, |
|
"loss": 0.0395, |
|
"step": 2920 |
|
}, |
|
{ |
|
"epoch": 4.733441033925686, |
|
"grad_norm": 0.37581634521484375, |
|
"learning_rate": 5.853328853082097e-05, |
|
"loss": 0.0454, |
|
"step": 2930 |
|
}, |
|
{ |
|
"epoch": 4.749596122778676, |
|
"grad_norm": 0.3175153136253357, |
|
"learning_rate": 5.826994500499675e-05, |
|
"loss": 0.0438, |
|
"step": 2940 |
|
}, |
|
{ |
|
"epoch": 4.765751211631664, |
|
"grad_norm": 0.6848868131637573, |
|
"learning_rate": 5.8006365405975436e-05, |
|
"loss": 0.0408, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 4.7819063004846525, |
|
"grad_norm": 0.5808501839637756, |
|
"learning_rate": 5.774255725787946e-05, |
|
"loss": 0.0469, |
|
"step": 2960 |
|
}, |
|
{ |
|
"epoch": 4.798061389337642, |
|
"grad_norm": 0.4114396870136261, |
|
"learning_rate": 5.747852809135539e-05, |
|
"loss": 0.0475, |
|
"step": 2970 |
|
}, |
|
{ |
|
"epoch": 4.81421647819063, |
|
"grad_norm": 0.4883790910243988, |
|
"learning_rate": 5.721428544335893e-05, |
|
"loss": 0.0427, |
|
"step": 2980 |
|
}, |
|
{ |
|
"epoch": 4.830371567043619, |
|
"grad_norm": 0.476870059967041, |
|
"learning_rate": 5.694983685693988e-05, |
|
"loss": 0.0375, |
|
"step": 2990 |
|
}, |
|
{ |
|
"epoch": 4.846526655896607, |
|
"grad_norm": 0.4612770974636078, |
|
"learning_rate": 5.668518988102668e-05, |
|
"loss": 0.0416, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 4.8626817447495965, |
|
"grad_norm": 0.6491737961769104, |
|
"learning_rate": 5.6420352070211016e-05, |
|
"loss": 0.0372, |
|
"step": 3010 |
|
}, |
|
{ |
|
"epoch": 4.878836833602585, |
|
"grad_norm": 0.433662474155426, |
|
"learning_rate": 5.615533098453215e-05, |
|
"loss": 0.0467, |
|
"step": 3020 |
|
}, |
|
{ |
|
"epoch": 4.894991922455573, |
|
"grad_norm": 0.2737475037574768, |
|
"learning_rate": 5.589013418926104e-05, |
|
"loss": 0.0413, |
|
"step": 3030 |
|
}, |
|
{ |
|
"epoch": 4.911147011308563, |
|
"grad_norm": 0.388280987739563, |
|
"learning_rate": 5.562476925468445e-05, |
|
"loss": 0.0338, |
|
"step": 3040 |
|
}, |
|
{ |
|
"epoch": 4.927302100161551, |
|
"grad_norm": 0.4380597174167633, |
|
"learning_rate": 5.535924375588887e-05, |
|
"loss": 0.0415, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 4.94345718901454, |
|
"grad_norm": 0.5273949503898621, |
|
"learning_rate": 5.509356527254421e-05, |
|
"loss": 0.0393, |
|
"step": 3060 |
|
}, |
|
{ |
|
"epoch": 4.959612277867528, |
|
"grad_norm": 0.9131794571876526, |
|
"learning_rate": 5.482774138868749e-05, |
|
"loss": 0.0459, |
|
"step": 3070 |
|
}, |
|
{ |
|
"epoch": 4.975767366720517, |
|
"grad_norm": 0.4145738482475281, |
|
"learning_rate": 5.456177969250632e-05, |
|
"loss": 0.038, |
|
"step": 3080 |
|
}, |
|
{ |
|
"epoch": 4.991922455573506, |
|
"grad_norm": 0.4649810791015625, |
|
"learning_rate": 5.4295687776122236e-05, |
|
"loss": 0.0451, |
|
"step": 3090 |
|
}, |
|
{ |
|
"epoch": 5.008077544426494, |
|
"grad_norm": 0.4478986859321594, |
|
"learning_rate": 5.4029473235374106e-05, |
|
"loss": 0.0439, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 5.024232633279483, |
|
"grad_norm": 0.34594130516052246, |
|
"learning_rate": 5.376314366960118e-05, |
|
"loss": 0.0451, |
|
"step": 3110 |
|
}, |
|
{ |
|
"epoch": 5.040387722132472, |
|
"grad_norm": 0.5222188830375671, |
|
"learning_rate": 5.3496706681426204e-05, |
|
"loss": 0.0413, |
|
"step": 3120 |
|
}, |
|
{ |
|
"epoch": 5.05654281098546, |
|
"grad_norm": 0.5172345638275146, |
|
"learning_rate": 5.323016987653842e-05, |
|
"loss": 0.0452, |
|
"step": 3130 |
|
}, |
|
{ |
|
"epoch": 5.072697899838449, |
|
"grad_norm": 0.3387891352176666, |
|
"learning_rate": 5.29635408634764e-05, |
|
"loss": 0.042, |
|
"step": 3140 |
|
}, |
|
{ |
|
"epoch": 5.088852988691438, |
|
"grad_norm": 0.45280104875564575, |
|
"learning_rate": 5.26968272534109e-05, |
|
"loss": 0.039, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 5.105008077544427, |
|
"grad_norm": 0.4317404329776764, |
|
"learning_rate": 5.2430036659927573e-05, |
|
"loss": 0.0377, |
|
"step": 3160 |
|
}, |
|
{ |
|
"epoch": 5.121163166397415, |
|
"grad_norm": 0.6537325978279114, |
|
"learning_rate": 5.2163176698809645e-05, |
|
"loss": 0.044, |
|
"step": 3170 |
|
}, |
|
{ |
|
"epoch": 5.1373182552504035, |
|
"grad_norm": 0.32357853651046753, |
|
"learning_rate": 5.189625498782047e-05, |
|
"loss": 0.0418, |
|
"step": 3180 |
|
}, |
|
{ |
|
"epoch": 5.153473344103393, |
|
"grad_norm": 0.5868157148361206, |
|
"learning_rate": 5.1629279146486155e-05, |
|
"loss": 0.0452, |
|
"step": 3190 |
|
}, |
|
{ |
|
"epoch": 5.169628432956381, |
|
"grad_norm": 0.46574723720550537, |
|
"learning_rate": 5.136225679587797e-05, |
|
"loss": 0.0432, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 5.18578352180937, |
|
"grad_norm": 0.2721109390258789, |
|
"learning_rate": 5.109519555839486e-05, |
|
"loss": 0.0424, |
|
"step": 3210 |
|
}, |
|
{ |
|
"epoch": 5.201938610662358, |
|
"grad_norm": 0.3568851053714752, |
|
"learning_rate": 5.082810305754583e-05, |
|
"loss": 0.0391, |
|
"step": 3220 |
|
}, |
|
{ |
|
"epoch": 5.2180936995153475, |
|
"grad_norm": 0.484744131565094, |
|
"learning_rate": 5.05609869177323e-05, |
|
"loss": 0.0371, |
|
"step": 3230 |
|
}, |
|
{ |
|
"epoch": 5.234248788368336, |
|
"grad_norm": 0.29547053575515747, |
|
"learning_rate": 5.029385476403051e-05, |
|
"loss": 0.0311, |
|
"step": 3240 |
|
}, |
|
{ |
|
"epoch": 5.250403877221324, |
|
"grad_norm": 0.3213876783847809, |
|
"learning_rate": 5.002671422197384e-05, |
|
"loss": 0.0334, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 5.266558966074314, |
|
"grad_norm": 0.3671923577785492, |
|
"learning_rate": 4.9759572917335104e-05, |
|
"loss": 0.0451, |
|
"step": 3260 |
|
}, |
|
{ |
|
"epoch": 5.282714054927302, |
|
"grad_norm": 0.34725895524024963, |
|
"learning_rate": 4.949243847590887e-05, |
|
"loss": 0.0375, |
|
"step": 3270 |
|
}, |
|
{ |
|
"epoch": 5.298869143780291, |
|
"grad_norm": 0.4185596704483032, |
|
"learning_rate": 4.922531852329384e-05, |
|
"loss": 0.0379, |
|
"step": 3280 |
|
}, |
|
{ |
|
"epoch": 5.315024232633279, |
|
"grad_norm": 0.5074782371520996, |
|
"learning_rate": 4.895822068467505e-05, |
|
"loss": 0.0402, |
|
"step": 3290 |
|
}, |
|
{ |
|
"epoch": 5.331179321486268, |
|
"grad_norm": 1.0807178020477295, |
|
"learning_rate": 4.869115258460635e-05, |
|
"loss": 0.0332, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 5.347334410339257, |
|
"grad_norm": 0.40008237957954407, |
|
"learning_rate": 4.8424121846792614e-05, |
|
"loss": 0.0422, |
|
"step": 3310 |
|
}, |
|
{ |
|
"epoch": 5.363489499192245, |
|
"grad_norm": 0.3162868618965149, |
|
"learning_rate": 4.8157136093872215e-05, |
|
"loss": 0.0332, |
|
"step": 3320 |
|
}, |
|
{ |
|
"epoch": 5.379644588045235, |
|
"grad_norm": 0.5000666379928589, |
|
"learning_rate": 4.789020294719933e-05, |
|
"loss": 0.0359, |
|
"step": 3330 |
|
}, |
|
{ |
|
"epoch": 5.395799676898223, |
|
"grad_norm": 0.4171485900878906, |
|
"learning_rate": 4.762333002662655e-05, |
|
"loss": 0.0383, |
|
"step": 3340 |
|
}, |
|
{ |
|
"epoch": 5.4119547657512115, |
|
"grad_norm": 0.6137621402740479, |
|
"learning_rate": 4.735652495028714e-05, |
|
"loss": 0.0393, |
|
"step": 3350 |
|
}, |
|
{ |
|
"epoch": 5.4281098546042, |
|
"grad_norm": 0.6010169386863708, |
|
"learning_rate": 4.708979533437778e-05, |
|
"loss": 0.0401, |
|
"step": 3360 |
|
}, |
|
{ |
|
"epoch": 5.444264943457189, |
|
"grad_norm": 0.32663294672966003, |
|
"learning_rate": 4.6823148792941e-05, |
|
"loss": 0.0422, |
|
"step": 3370 |
|
}, |
|
{ |
|
"epoch": 5.460420032310178, |
|
"grad_norm": 0.3632521629333496, |
|
"learning_rate": 4.655659293764793e-05, |
|
"loss": 0.0426, |
|
"step": 3380 |
|
}, |
|
{ |
|
"epoch": 5.476575121163166, |
|
"grad_norm": 0.30977901816368103, |
|
"learning_rate": 4.629013537758093e-05, |
|
"loss": 0.0417, |
|
"step": 3390 |
|
}, |
|
{ |
|
"epoch": 5.4927302100161555, |
|
"grad_norm": 0.42319706082344055, |
|
"learning_rate": 4.6023783719016526e-05, |
|
"loss": 0.0431, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 5.508885298869144, |
|
"grad_norm": 0.3542233109474182, |
|
"learning_rate": 4.57575455652081e-05, |
|
"loss": 0.0365, |
|
"step": 3410 |
|
}, |
|
{ |
|
"epoch": 5.525040387722132, |
|
"grad_norm": 0.4000030755996704, |
|
"learning_rate": 4.5491428516168975e-05, |
|
"loss": 0.0467, |
|
"step": 3420 |
|
}, |
|
{ |
|
"epoch": 5.541195476575121, |
|
"grad_norm": 0.3602658212184906, |
|
"learning_rate": 4.52254401684554e-05, |
|
"loss": 0.0339, |
|
"step": 3430 |
|
}, |
|
{ |
|
"epoch": 5.55735056542811, |
|
"grad_norm": 0.41686055064201355, |
|
"learning_rate": 4.495958811494978e-05, |
|
"loss": 0.0324, |
|
"step": 3440 |
|
}, |
|
{ |
|
"epoch": 5.573505654281099, |
|
"grad_norm": 0.3794184625148773, |
|
"learning_rate": 4.469387994464381e-05, |
|
"loss": 0.0393, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 5.589660743134087, |
|
"grad_norm": 0.3583800196647644, |
|
"learning_rate": 4.442832324242197e-05, |
|
"loss": 0.0427, |
|
"step": 3460 |
|
}, |
|
{ |
|
"epoch": 5.605815831987076, |
|
"grad_norm": 0.27712520956993103, |
|
"learning_rate": 4.416292558884489e-05, |
|
"loss": 0.0355, |
|
"step": 3470 |
|
}, |
|
{ |
|
"epoch": 5.621970920840065, |
|
"grad_norm": 0.41386884450912476, |
|
"learning_rate": 4.389769455993303e-05, |
|
"loss": 0.0387, |
|
"step": 3480 |
|
}, |
|
{ |
|
"epoch": 5.638126009693053, |
|
"grad_norm": 0.38608935475349426, |
|
"learning_rate": 4.3632637726950415e-05, |
|
"loss": 0.0353, |
|
"step": 3490 |
|
}, |
|
{ |
|
"epoch": 5.654281098546042, |
|
"grad_norm": 0.5377467274665833, |
|
"learning_rate": 4.336776265618844e-05, |
|
"loss": 0.0388, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 5.670436187399031, |
|
"grad_norm": 0.5470876693725586, |
|
"learning_rate": 4.3103076908749996e-05, |
|
"loss": 0.0407, |
|
"step": 3510 |
|
}, |
|
{ |
|
"epoch": 5.686591276252019, |
|
"grad_norm": 0.3438394367694855, |
|
"learning_rate": 4.283858804033351e-05, |
|
"loss": 0.0348, |
|
"step": 3520 |
|
}, |
|
{ |
|
"epoch": 5.702746365105008, |
|
"grad_norm": 0.38908761739730835, |
|
"learning_rate": 4.257430360101734e-05, |
|
"loss": 0.0342, |
|
"step": 3530 |
|
}, |
|
{ |
|
"epoch": 5.718901453957997, |
|
"grad_norm": 0.4778120219707489, |
|
"learning_rate": 4.2310231135044196e-05, |
|
"loss": 0.0421, |
|
"step": 3540 |
|
}, |
|
{ |
|
"epoch": 5.735056542810986, |
|
"grad_norm": 0.4164102375507355, |
|
"learning_rate": 4.2046378180605894e-05, |
|
"loss": 0.0395, |
|
"step": 3550 |
|
}, |
|
{ |
|
"epoch": 5.751211631663974, |
|
"grad_norm": 0.31713828444480896, |
|
"learning_rate": 4.1782752269627986e-05, |
|
"loss": 0.0378, |
|
"step": 3560 |
|
}, |
|
{ |
|
"epoch": 5.7673667205169625, |
|
"grad_norm": 0.36085453629493713, |
|
"learning_rate": 4.1519360927554953e-05, |
|
"loss": 0.0419, |
|
"step": 3570 |
|
}, |
|
{ |
|
"epoch": 5.783521809369952, |
|
"grad_norm": 0.3456893861293793, |
|
"learning_rate": 4.125621167313519e-05, |
|
"loss": 0.0408, |
|
"step": 3580 |
|
}, |
|
{ |
|
"epoch": 5.79967689822294, |
|
"grad_norm": 0.4086418151855469, |
|
"learning_rate": 4.09933120182066e-05, |
|
"loss": 0.0361, |
|
"step": 3590 |
|
}, |
|
{ |
|
"epoch": 5.815831987075929, |
|
"grad_norm": 0.3052937984466553, |
|
"learning_rate": 4.073066946748192e-05, |
|
"loss": 0.0372, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 5.831987075928918, |
|
"grad_norm": 0.3931577801704407, |
|
"learning_rate": 4.046829151833469e-05, |
|
"loss": 0.0331, |
|
"step": 3610 |
|
}, |
|
{ |
|
"epoch": 5.8481421647819065, |
|
"grad_norm": 0.46110355854034424, |
|
"learning_rate": 4.020618566058513e-05, |
|
"loss": 0.0354, |
|
"step": 3620 |
|
}, |
|
{ |
|
"epoch": 5.864297253634895, |
|
"grad_norm": 0.39353641867637634, |
|
"learning_rate": 3.994435937628636e-05, |
|
"loss": 0.035, |
|
"step": 3630 |
|
}, |
|
{ |
|
"epoch": 5.880452342487883, |
|
"grad_norm": 0.4410620927810669, |
|
"learning_rate": 3.968282013951079e-05, |
|
"loss": 0.0374, |
|
"step": 3640 |
|
}, |
|
{ |
|
"epoch": 5.896607431340873, |
|
"grad_norm": 0.36808839440345764, |
|
"learning_rate": 3.9421575416136866e-05, |
|
"loss": 0.0381, |
|
"step": 3650 |
|
}, |
|
{ |
|
"epoch": 5.912762520193861, |
|
"grad_norm": 0.38404178619384766, |
|
"learning_rate": 3.9160632663635786e-05, |
|
"loss": 0.0348, |
|
"step": 3660 |
|
}, |
|
{ |
|
"epoch": 5.92891760904685, |
|
"grad_norm": 0.2804437577724457, |
|
"learning_rate": 3.88999993308588e-05, |
|
"loss": 0.0346, |
|
"step": 3670 |
|
}, |
|
{ |
|
"epoch": 5.945072697899839, |
|
"grad_norm": 0.4300249218940735, |
|
"learning_rate": 3.86396828578244e-05, |
|
"loss": 0.0378, |
|
"step": 3680 |
|
}, |
|
{ |
|
"epoch": 5.961227786752827, |
|
"grad_norm": 0.3507043421268463, |
|
"learning_rate": 3.837969067550611e-05, |
|
"loss": 0.0353, |
|
"step": 3690 |
|
}, |
|
{ |
|
"epoch": 5.977382875605816, |
|
"grad_norm": 0.3638635277748108, |
|
"learning_rate": 3.812003020562022e-05, |
|
"loss": 0.0314, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 5.993537964458804, |
|
"grad_norm": 0.3249291181564331, |
|
"learning_rate": 3.7860708860414005e-05, |
|
"loss": 0.0421, |
|
"step": 3710 |
|
}, |
|
{ |
|
"epoch": 6.009693053311794, |
|
"grad_norm": 0.4243714511394501, |
|
"learning_rate": 3.760173404245409e-05, |
|
"loss": 0.034, |
|
"step": 3720 |
|
}, |
|
{ |
|
"epoch": 6.025848142164782, |
|
"grad_norm": 0.2857236862182617, |
|
"learning_rate": 3.734311314441521e-05, |
|
"loss": 0.0373, |
|
"step": 3730 |
|
}, |
|
{ |
|
"epoch": 6.0420032310177705, |
|
"grad_norm": 0.3825433850288391, |
|
"learning_rate": 3.708485354886906e-05, |
|
"loss": 0.0298, |
|
"step": 3740 |
|
}, |
|
{ |
|
"epoch": 6.058158319870759, |
|
"grad_norm": 0.347135990858078, |
|
"learning_rate": 3.6826962628073705e-05, |
|
"loss": 0.0348, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 6.074313408723748, |
|
"grad_norm": 0.4767064154148102, |
|
"learning_rate": 3.6569447743762986e-05, |
|
"loss": 0.0341, |
|
"step": 3760 |
|
}, |
|
{ |
|
"epoch": 6.090468497576737, |
|
"grad_norm": 0.2834322154521942, |
|
"learning_rate": 3.631231624693645e-05, |
|
"loss": 0.0391, |
|
"step": 3770 |
|
}, |
|
{ |
|
"epoch": 6.106623586429725, |
|
"grad_norm": 0.635104775428772, |
|
"learning_rate": 3.605557547764951e-05, |
|
"loss": 0.0355, |
|
"step": 3780 |
|
}, |
|
{ |
|
"epoch": 6.1227786752827145, |
|
"grad_norm": 0.35917991399765015, |
|
"learning_rate": 3.579923276480387e-05, |
|
"loss": 0.0303, |
|
"step": 3790 |
|
}, |
|
{ |
|
"epoch": 6.138933764135703, |
|
"grad_norm": 0.40180811285972595, |
|
"learning_rate": 3.5543295425938414e-05, |
|
"loss": 0.036, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 6.155088852988691, |
|
"grad_norm": 0.24985694885253906, |
|
"learning_rate": 3.5287770767020164e-05, |
|
"loss": 0.0291, |
|
"step": 3810 |
|
}, |
|
{ |
|
"epoch": 6.17124394184168, |
|
"grad_norm": 0.36490491032600403, |
|
"learning_rate": 3.5032666082235896e-05, |
|
"loss": 0.0397, |
|
"step": 3820 |
|
}, |
|
{ |
|
"epoch": 6.187399030694669, |
|
"grad_norm": 0.2522122263908386, |
|
"learning_rate": 3.477798865378375e-05, |
|
"loss": 0.0335, |
|
"step": 3830 |
|
}, |
|
{ |
|
"epoch": 6.203554119547658, |
|
"grad_norm": 0.2659394145011902, |
|
"learning_rate": 3.4523745751665534e-05, |
|
"loss": 0.0303, |
|
"step": 3840 |
|
}, |
|
{ |
|
"epoch": 6.219709208400646, |
|
"grad_norm": 0.2996593713760376, |
|
"learning_rate": 3.426994463347902e-05, |
|
"loss": 0.0327, |
|
"step": 3850 |
|
}, |
|
{ |
|
"epoch": 6.2358642972536344, |
|
"grad_norm": 0.429979145526886, |
|
"learning_rate": 3.401659254421094e-05, |
|
"loss": 0.0367, |
|
"step": 3860 |
|
}, |
|
{ |
|
"epoch": 6.252019386106624, |
|
"grad_norm": 0.3394151031970978, |
|
"learning_rate": 3.3763696716029957e-05, |
|
"loss": 0.0316, |
|
"step": 3870 |
|
}, |
|
{ |
|
"epoch": 6.268174474959612, |
|
"grad_norm": 0.5161323547363281, |
|
"learning_rate": 3.351126436808048e-05, |
|
"loss": 0.0365, |
|
"step": 3880 |
|
}, |
|
{ |
|
"epoch": 6.284329563812601, |
|
"grad_norm": 0.3514617681503296, |
|
"learning_rate": 3.325930270627632e-05, |
|
"loss": 0.0316, |
|
"step": 3890 |
|
}, |
|
{ |
|
"epoch": 6.30048465266559, |
|
"grad_norm": 0.4464913606643677, |
|
"learning_rate": 3.300781892309523e-05, |
|
"loss": 0.0351, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 6.316639741518578, |
|
"grad_norm": 0.4298667311668396, |
|
"learning_rate": 3.2756820197373394e-05, |
|
"loss": 0.0347, |
|
"step": 3910 |
|
}, |
|
{ |
|
"epoch": 6.332794830371567, |
|
"grad_norm": 0.44100216031074524, |
|
"learning_rate": 3.250631369410064e-05, |
|
"loss": 0.0328, |
|
"step": 3920 |
|
}, |
|
{ |
|
"epoch": 6.348949919224555, |
|
"grad_norm": 0.3341505527496338, |
|
"learning_rate": 3.2256306564215796e-05, |
|
"loss": 0.0345, |
|
"step": 3930 |
|
}, |
|
{ |
|
"epoch": 6.365105008077545, |
|
"grad_norm": 0.41437703371047974, |
|
"learning_rate": 3.20068059444027e-05, |
|
"loss": 0.0334, |
|
"step": 3940 |
|
}, |
|
{ |
|
"epoch": 6.381260096930533, |
|
"grad_norm": 0.5106116533279419, |
|
"learning_rate": 3.1757818956886295e-05, |
|
"loss": 0.0357, |
|
"step": 3950 |
|
}, |
|
{ |
|
"epoch": 6.3974151857835215, |
|
"grad_norm": 0.2565278708934784, |
|
"learning_rate": 3.150935270922951e-05, |
|
"loss": 0.0286, |
|
"step": 3960 |
|
}, |
|
{ |
|
"epoch": 6.413570274636511, |
|
"grad_norm": 0.3734126389026642, |
|
"learning_rate": 3.126141429413019e-05, |
|
"loss": 0.0297, |
|
"step": 3970 |
|
}, |
|
{ |
|
"epoch": 6.429725363489499, |
|
"grad_norm": 0.34675681591033936, |
|
"learning_rate": 3.101401078921878e-05, |
|
"loss": 0.028, |
|
"step": 3980 |
|
}, |
|
{ |
|
"epoch": 6.445880452342488, |
|
"grad_norm": 0.42493683099746704, |
|
"learning_rate": 3.076714925685617e-05, |
|
"loss": 0.03, |
|
"step": 3990 |
|
}, |
|
{ |
|
"epoch": 6.462035541195476, |
|
"grad_norm": 0.30656370520591736, |
|
"learning_rate": 3.052083674393221e-05, |
|
"loss": 0.0312, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 6.4781906300484655, |
|
"grad_norm": 0.36631324887275696, |
|
"learning_rate": 3.0275080281664414e-05, |
|
"loss": 0.0279, |
|
"step": 4010 |
|
}, |
|
{ |
|
"epoch": 6.494345718901454, |
|
"grad_norm": 0.5831628441810608, |
|
"learning_rate": 3.0029886885397367e-05, |
|
"loss": 0.0354, |
|
"step": 4020 |
|
}, |
|
{ |
|
"epoch": 6.510500807754442, |
|
"grad_norm": 0.3962215781211853, |
|
"learning_rate": 2.9785263554402366e-05, |
|
"loss": 0.0392, |
|
"step": 4030 |
|
}, |
|
{ |
|
"epoch": 6.526655896607432, |
|
"grad_norm": 0.45189252495765686, |
|
"learning_rate": 2.9541217271677745e-05, |
|
"loss": 0.0356, |
|
"step": 4040 |
|
}, |
|
{ |
|
"epoch": 6.54281098546042, |
|
"grad_norm": 0.4892602860927582, |
|
"learning_rate": 2.9297755003749394e-05, |
|
"loss": 0.0297, |
|
"step": 4050 |
|
}, |
|
{ |
|
"epoch": 6.558966074313409, |
|
"grad_norm": 0.32902640104293823, |
|
"learning_rate": 2.9054883700471974e-05, |
|
"loss": 0.0315, |
|
"step": 4060 |
|
}, |
|
{ |
|
"epoch": 6.575121163166397, |
|
"grad_norm": 0.3130761682987213, |
|
"learning_rate": 2.8812610294830566e-05, |
|
"loss": 0.0336, |
|
"step": 4070 |
|
}, |
|
{ |
|
"epoch": 6.591276252019386, |
|
"grad_norm": 0.5444455146789551, |
|
"learning_rate": 2.8570941702742663e-05, |
|
"loss": 0.0293, |
|
"step": 4080 |
|
}, |
|
{ |
|
"epoch": 6.607431340872375, |
|
"grad_norm": 0.5223131775856018, |
|
"learning_rate": 2.832988482286081e-05, |
|
"loss": 0.032, |
|
"step": 4090 |
|
}, |
|
{ |
|
"epoch": 6.623586429725363, |
|
"grad_norm": 0.5296066403388977, |
|
"learning_rate": 2.808944653637564e-05, |
|
"loss": 0.0336, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 6.639741518578353, |
|
"grad_norm": 0.4030674397945404, |
|
"learning_rate": 2.7849633706819533e-05, |
|
"loss": 0.0355, |
|
"step": 4110 |
|
}, |
|
{ |
|
"epoch": 6.655896607431341, |
|
"grad_norm": 0.42938342690467834, |
|
"learning_rate": 2.7610453179870554e-05, |
|
"loss": 0.0291, |
|
"step": 4120 |
|
}, |
|
{ |
|
"epoch": 6.6720516962843295, |
|
"grad_norm": 0.4580219089984894, |
|
"learning_rate": 2.7371911783157178e-05, |
|
"loss": 0.0318, |
|
"step": 4130 |
|
}, |
|
{ |
|
"epoch": 6.688206785137318, |
|
"grad_norm": 0.30596330761909485, |
|
"learning_rate": 2.7134016326063234e-05, |
|
"loss": 0.034, |
|
"step": 4140 |
|
}, |
|
{ |
|
"epoch": 6.704361873990307, |
|
"grad_norm": 0.35359278321266174, |
|
"learning_rate": 2.6896773599533694e-05, |
|
"loss": 0.0299, |
|
"step": 4150 |
|
}, |
|
{ |
|
"epoch": 6.720516962843296, |
|
"grad_norm": 0.29407617449760437, |
|
"learning_rate": 2.6660190375880657e-05, |
|
"loss": 0.0266, |
|
"step": 4160 |
|
}, |
|
{ |
|
"epoch": 6.736672051696284, |
|
"grad_norm": 0.357388973236084, |
|
"learning_rate": 2.6424273408590188e-05, |
|
"loss": 0.0352, |
|
"step": 4170 |
|
}, |
|
{ |
|
"epoch": 6.7528271405492735, |
|
"grad_norm": 0.8390901684761047, |
|
"learning_rate": 2.6189029432129385e-05, |
|
"loss": 0.0377, |
|
"step": 4180 |
|
}, |
|
{ |
|
"epoch": 6.768982229402262, |
|
"grad_norm": 0.28982290625572205, |
|
"learning_rate": 2.5954465161754227e-05, |
|
"loss": 0.0315, |
|
"step": 4190 |
|
}, |
|
{ |
|
"epoch": 6.78513731825525, |
|
"grad_norm": 0.5228689908981323, |
|
"learning_rate": 2.5720587293317826e-05, |
|
"loss": 0.0283, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 6.801292407108239, |
|
"grad_norm": 0.5332914590835571, |
|
"learning_rate": 2.5487402503079395e-05, |
|
"loss": 0.0314, |
|
"step": 4210 |
|
}, |
|
{ |
|
"epoch": 6.817447495961228, |
|
"grad_norm": 0.5198635458946228, |
|
"learning_rate": 2.5254917447513504e-05, |
|
"loss": 0.0298, |
|
"step": 4220 |
|
}, |
|
{ |
|
"epoch": 6.833602584814217, |
|
"grad_norm": 0.37016230821609497, |
|
"learning_rate": 2.5023138763120217e-05, |
|
"loss": 0.0281, |
|
"step": 4230 |
|
}, |
|
{ |
|
"epoch": 6.849757673667205, |
|
"grad_norm": 0.32923170924186707, |
|
"learning_rate": 2.479207306623554e-05, |
|
"loss": 0.0308, |
|
"step": 4240 |
|
}, |
|
{ |
|
"epoch": 6.865912762520194, |
|
"grad_norm": 0.2647690176963806, |
|
"learning_rate": 2.456172695284263e-05, |
|
"loss": 0.0336, |
|
"step": 4250 |
|
}, |
|
{ |
|
"epoch": 6.882067851373183, |
|
"grad_norm": 0.39588427543640137, |
|
"learning_rate": 2.433210699838342e-05, |
|
"loss": 0.0328, |
|
"step": 4260 |
|
}, |
|
{ |
|
"epoch": 6.898222940226171, |
|
"grad_norm": 0.28190135955810547, |
|
"learning_rate": 2.4103219757571033e-05, |
|
"loss": 0.0292, |
|
"step": 4270 |
|
}, |
|
{ |
|
"epoch": 6.91437802907916, |
|
"grad_norm": 0.4510742723941803, |
|
"learning_rate": 2.3875071764202563e-05, |
|
"loss": 0.0293, |
|
"step": 4280 |
|
}, |
|
{ |
|
"epoch": 6.930533117932149, |
|
"grad_norm": 0.35639435052871704, |
|
"learning_rate": 2.36476695309726e-05, |
|
"loss": 0.0274, |
|
"step": 4290 |
|
}, |
|
{ |
|
"epoch": 6.946688206785137, |
|
"grad_norm": 0.38058537244796753, |
|
"learning_rate": 2.342101954928733e-05, |
|
"loss": 0.0332, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 6.962843295638126, |
|
"grad_norm": 0.5739650726318359, |
|
"learning_rate": 2.3195128289079264e-05, |
|
"loss": 0.0266, |
|
"step": 4310 |
|
}, |
|
{ |
|
"epoch": 6.978998384491114, |
|
"grad_norm": 0.5040541887283325, |
|
"learning_rate": 2.2970002198622444e-05, |
|
"loss": 0.0386, |
|
"step": 4320 |
|
}, |
|
{ |
|
"epoch": 6.995153473344104, |
|
"grad_norm": 0.2796167731285095, |
|
"learning_rate": 2.2745647704348506e-05, |
|
"loss": 0.0304, |
|
"step": 4330 |
|
}, |
|
{ |
|
"epoch": 7.011308562197092, |
|
"grad_norm": 0.8160725235939026, |
|
"learning_rate": 2.2522071210663108e-05, |
|
"loss": 0.0257, |
|
"step": 4340 |
|
}, |
|
{ |
|
"epoch": 7.0274636510500805, |
|
"grad_norm": 0.2881336510181427, |
|
"learning_rate": 2.2299279099763176e-05, |
|
"loss": 0.0291, |
|
"step": 4350 |
|
}, |
|
{ |
|
"epoch": 7.04361873990307, |
|
"grad_norm": 0.43697014451026917, |
|
"learning_rate": 2.2077277731454743e-05, |
|
"loss": 0.0302, |
|
"step": 4360 |
|
}, |
|
{ |
|
"epoch": 7.059773828756058, |
|
"grad_norm": 0.2801852822303772, |
|
"learning_rate": 2.185607344297132e-05, |
|
"loss": 0.0285, |
|
"step": 4370 |
|
}, |
|
{ |
|
"epoch": 7.075928917609047, |
|
"grad_norm": 0.4039601683616638, |
|
"learning_rate": 2.1635672548793067e-05, |
|
"loss": 0.0249, |
|
"step": 4380 |
|
}, |
|
{ |
|
"epoch": 7.092084006462035, |
|
"grad_norm": 0.312288761138916, |
|
"learning_rate": 2.1416081340466477e-05, |
|
"loss": 0.0289, |
|
"step": 4390 |
|
}, |
|
{ |
|
"epoch": 7.1082390953150245, |
|
"grad_norm": 0.3759534955024719, |
|
"learning_rate": 2.119730608642489e-05, |
|
"loss": 0.0343, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 7.124394184168013, |
|
"grad_norm": 0.5132532119750977, |
|
"learning_rate": 2.0979353031809383e-05, |
|
"loss": 0.0346, |
|
"step": 4410 |
|
}, |
|
{ |
|
"epoch": 7.140549273021001, |
|
"grad_norm": 0.9728456139564514, |
|
"learning_rate": 2.0762228398290697e-05, |
|
"loss": 0.0339, |
|
"step": 4420 |
|
}, |
|
{ |
|
"epoch": 7.156704361873991, |
|
"grad_norm": 0.32944944500923157, |
|
"learning_rate": 2.054593838389143e-05, |
|
"loss": 0.0257, |
|
"step": 4430 |
|
}, |
|
{ |
|
"epoch": 7.172859450726979, |
|
"grad_norm": 0.22434404492378235, |
|
"learning_rate": 2.033048916280928e-05, |
|
"loss": 0.0317, |
|
"step": 4440 |
|
}, |
|
{ |
|
"epoch": 7.189014539579968, |
|
"grad_norm": 0.36417004466056824, |
|
"learning_rate": 2.0115886885240682e-05, |
|
"loss": 0.0264, |
|
"step": 4450 |
|
}, |
|
{ |
|
"epoch": 7.205169628432956, |
|
"grad_norm": 0.29687365889549255, |
|
"learning_rate": 1.990213767720533e-05, |
|
"loss": 0.03, |
|
"step": 4460 |
|
}, |
|
{ |
|
"epoch": 7.221324717285945, |
|
"grad_norm": 0.41539931297302246, |
|
"learning_rate": 1.9689247640371223e-05, |
|
"loss": 0.0294, |
|
"step": 4470 |
|
}, |
|
{ |
|
"epoch": 7.237479806138934, |
|
"grad_norm": 0.44902583956718445, |
|
"learning_rate": 1.9477222851880545e-05, |
|
"loss": 0.0282, |
|
"step": 4480 |
|
}, |
|
{ |
|
"epoch": 7.253634894991922, |
|
"grad_norm": 0.38103097677230835, |
|
"learning_rate": 1.926606936417614e-05, |
|
"loss": 0.0311, |
|
"step": 4490 |
|
}, |
|
{ |
|
"epoch": 7.269789983844911, |
|
"grad_norm": 0.42052754759788513, |
|
"learning_rate": 1.9055793204828842e-05, |
|
"loss": 0.0298, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 7.2859450726979, |
|
"grad_norm": 0.645574688911438, |
|
"learning_rate": 1.8846400376365253e-05, |
|
"loss": 0.0291, |
|
"step": 4510 |
|
}, |
|
{ |
|
"epoch": 7.3021001615508885, |
|
"grad_norm": 0.4374733865261078, |
|
"learning_rate": 1.8637896856096548e-05, |
|
"loss": 0.0301, |
|
"step": 4520 |
|
}, |
|
{ |
|
"epoch": 7.318255250403877, |
|
"grad_norm": 0.46677830815315247, |
|
"learning_rate": 1.843028859594772e-05, |
|
"loss": 0.0283, |
|
"step": 4530 |
|
}, |
|
{ |
|
"epoch": 7.334410339256866, |
|
"grad_norm": 0.4820699393749237, |
|
"learning_rate": 1.8223581522287807e-05, |
|
"loss": 0.0264, |
|
"step": 4540 |
|
}, |
|
{ |
|
"epoch": 7.350565428109855, |
|
"grad_norm": 0.2922935485839844, |
|
"learning_rate": 1.801778153576058e-05, |
|
"loss": 0.0256, |
|
"step": 4550 |
|
}, |
|
{ |
|
"epoch": 7.366720516962843, |
|
"grad_norm": 0.6086759567260742, |
|
"learning_rate": 1.7812894511116235e-05, |
|
"loss": 0.0247, |
|
"step": 4560 |
|
}, |
|
{ |
|
"epoch": 7.382875605815832, |
|
"grad_norm": 0.2402912974357605, |
|
"learning_rate": 1.7608926297043583e-05, |
|
"loss": 0.0309, |
|
"step": 4570 |
|
}, |
|
{ |
|
"epoch": 7.399030694668821, |
|
"grad_norm": 0.25733429193496704, |
|
"learning_rate": 1.7405882716003154e-05, |
|
"loss": 0.0219, |
|
"step": 4580 |
|
}, |
|
{ |
|
"epoch": 7.415185783521809, |
|
"grad_norm": 0.4837753474712372, |
|
"learning_rate": 1.7203769564060962e-05, |
|
"loss": 0.0262, |
|
"step": 4590 |
|
}, |
|
{ |
|
"epoch": 7.431340872374798, |
|
"grad_norm": 0.31810057163238525, |
|
"learning_rate": 1.700259261072312e-05, |
|
"loss": 0.0234, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 7.447495961227787, |
|
"grad_norm": 0.5520622134208679, |
|
"learning_rate": 1.6802357598771012e-05, |
|
"loss": 0.0274, |
|
"step": 4610 |
|
}, |
|
{ |
|
"epoch": 7.463651050080776, |
|
"grad_norm": 0.3220314085483551, |
|
"learning_rate": 1.6603070244097523e-05, |
|
"loss": 0.0277, |
|
"step": 4620 |
|
}, |
|
{ |
|
"epoch": 7.479806138933764, |
|
"grad_norm": 0.3330337703227997, |
|
"learning_rate": 1.6404736235543705e-05, |
|
"loss": 0.032, |
|
"step": 4630 |
|
}, |
|
{ |
|
"epoch": 7.4959612277867524, |
|
"grad_norm": 0.8944841027259827, |
|
"learning_rate": 1.6207361234736533e-05, |
|
"loss": 0.0257, |
|
"step": 4640 |
|
}, |
|
{ |
|
"epoch": 7.512116316639742, |
|
"grad_norm": 0.3682458698749542, |
|
"learning_rate": 1.6010950875927182e-05, |
|
"loss": 0.0268, |
|
"step": 4650 |
|
}, |
|
{ |
|
"epoch": 7.52827140549273, |
|
"grad_norm": 0.4134623408317566, |
|
"learning_rate": 1.581551076583023e-05, |
|
"loss": 0.0353, |
|
"step": 4660 |
|
}, |
|
{ |
|
"epoch": 7.544426494345719, |
|
"grad_norm": 0.2340182512998581, |
|
"learning_rate": 1.5621046483463663e-05, |
|
"loss": 0.0252, |
|
"step": 4670 |
|
}, |
|
{ |
|
"epoch": 7.560581583198708, |
|
"grad_norm": 0.6091485619544983, |
|
"learning_rate": 1.5427563579989507e-05, |
|
"loss": 0.0214, |
|
"step": 4680 |
|
}, |
|
{ |
|
"epoch": 7.576736672051696, |
|
"grad_norm": 0.7271833419799805, |
|
"learning_rate": 1.523506757855545e-05, |
|
"loss": 0.0305, |
|
"step": 4690 |
|
}, |
|
{ |
|
"epoch": 7.592891760904685, |
|
"grad_norm": 0.3721354007720947, |
|
"learning_rate": 1.504356397413713e-05, |
|
"loss": 0.032, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 7.609046849757673, |
|
"grad_norm": 0.3686143755912781, |
|
"learning_rate": 1.485305823338135e-05, |
|
"loss": 0.0258, |
|
"step": 4710 |
|
}, |
|
{ |
|
"epoch": 7.625201938610663, |
|
"grad_norm": 0.3255109488964081, |
|
"learning_rate": 1.4663555794449918e-05, |
|
"loss": 0.0248, |
|
"step": 4720 |
|
}, |
|
{ |
|
"epoch": 7.641357027463651, |
|
"grad_norm": 0.35630714893341064, |
|
"learning_rate": 1.4475062066864514e-05, |
|
"loss": 0.031, |
|
"step": 4730 |
|
}, |
|
{ |
|
"epoch": 7.6575121163166395, |
|
"grad_norm": 0.2801692485809326, |
|
"learning_rate": 1.4287582431352175e-05, |
|
"loss": 0.0246, |
|
"step": 4740 |
|
}, |
|
{ |
|
"epoch": 7.673667205169629, |
|
"grad_norm": 0.3327733874320984, |
|
"learning_rate": 1.41011222396918e-05, |
|
"loss": 0.0251, |
|
"step": 4750 |
|
}, |
|
{ |
|
"epoch": 7.689822294022617, |
|
"grad_norm": 0.6043513417243958, |
|
"learning_rate": 1.3915686814561285e-05, |
|
"loss": 0.0288, |
|
"step": 4760 |
|
}, |
|
{ |
|
"epoch": 7.705977382875606, |
|
"grad_norm": 0.3464643657207489, |
|
"learning_rate": 1.373128144938563e-05, |
|
"loss": 0.0299, |
|
"step": 4770 |
|
}, |
|
{ |
|
"epoch": 7.722132471728594, |
|
"grad_norm": 0.21582302451133728, |
|
"learning_rate": 1.354791140818582e-05, |
|
"loss": 0.0337, |
|
"step": 4780 |
|
}, |
|
{ |
|
"epoch": 7.7382875605815835, |
|
"grad_norm": 0.4652714431285858, |
|
"learning_rate": 1.3365581925428594e-05, |
|
"loss": 0.0241, |
|
"step": 4790 |
|
}, |
|
{ |
|
"epoch": 7.754442649434572, |
|
"grad_norm": 0.2494271844625473, |
|
"learning_rate": 1.3184298205876938e-05, |
|
"loss": 0.0271, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 7.77059773828756, |
|
"grad_norm": 0.44884902238845825, |
|
"learning_rate": 1.3004065424441636e-05, |
|
"loss": 0.0258, |
|
"step": 4810 |
|
}, |
|
{ |
|
"epoch": 7.78675282714055, |
|
"grad_norm": 0.3176079988479614, |
|
"learning_rate": 1.282488872603339e-05, |
|
"loss": 0.0226, |
|
"step": 4820 |
|
}, |
|
{ |
|
"epoch": 7.802907915993538, |
|
"grad_norm": 0.42613035440444946, |
|
"learning_rate": 1.2646773225416132e-05, |
|
"loss": 0.0283, |
|
"step": 4830 |
|
}, |
|
{ |
|
"epoch": 7.819063004846527, |
|
"grad_norm": 0.7398589849472046, |
|
"learning_rate": 1.2469724007060835e-05, |
|
"loss": 0.0377, |
|
"step": 4840 |
|
}, |
|
{ |
|
"epoch": 7.835218093699515, |
|
"grad_norm": 0.38897576928138733, |
|
"learning_rate": 1.2293746125000538e-05, |
|
"loss": 0.0257, |
|
"step": 4850 |
|
}, |
|
{ |
|
"epoch": 7.851373182552504, |
|
"grad_norm": 0.50649094581604, |
|
"learning_rate": 1.2118844602685958e-05, |
|
"loss": 0.0253, |
|
"step": 4860 |
|
}, |
|
{ |
|
"epoch": 7.867528271405493, |
|
"grad_norm": 0.28981852531433105, |
|
"learning_rate": 1.1945024432842134e-05, |
|
"loss": 0.0285, |
|
"step": 4870 |
|
}, |
|
{ |
|
"epoch": 7.883683360258481, |
|
"grad_norm": 0.406024307012558, |
|
"learning_rate": 1.1772290577325895e-05, |
|
"loss": 0.0306, |
|
"step": 4880 |
|
}, |
|
{ |
|
"epoch": 7.899838449111471, |
|
"grad_norm": 0.2725732922554016, |
|
"learning_rate": 1.1600647966984274e-05, |
|
"loss": 0.0246, |
|
"step": 4890 |
|
}, |
|
{ |
|
"epoch": 7.915993537964459, |
|
"grad_norm": 0.4214000999927521, |
|
"learning_rate": 1.1430101501513634e-05, |
|
"loss": 0.0281, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 7.9321486268174475, |
|
"grad_norm": 0.2999376952648163, |
|
"learning_rate": 1.1260656049319957e-05, |
|
"loss": 0.024, |
|
"step": 4910 |
|
}, |
|
{ |
|
"epoch": 7.948303715670436, |
|
"grad_norm": 0.31904590129852295, |
|
"learning_rate": 1.1092316447379692e-05, |
|
"loss": 0.0212, |
|
"step": 4920 |
|
}, |
|
{ |
|
"epoch": 7.964458804523425, |
|
"grad_norm": 0.3466980755329132, |
|
"learning_rate": 1.0925087501101872e-05, |
|
"loss": 0.0293, |
|
"step": 4930 |
|
}, |
|
{ |
|
"epoch": 7.980613893376414, |
|
"grad_norm": 0.3411683440208435, |
|
"learning_rate": 1.0758973984190762e-05, |
|
"loss": 0.0219, |
|
"step": 4940 |
|
}, |
|
{ |
|
"epoch": 7.996768982229402, |
|
"grad_norm": 0.3246071934700012, |
|
"learning_rate": 1.0593980638509693e-05, |
|
"loss": 0.0295, |
|
"step": 4950 |
|
}, |
|
{ |
|
"epoch": 8.012924071082391, |
|
"grad_norm": 0.29273203015327454, |
|
"learning_rate": 1.043011217394571e-05, |
|
"loss": 0.0264, |
|
"step": 4960 |
|
}, |
|
{ |
|
"epoch": 8.02907915993538, |
|
"grad_norm": 0.36481159925460815, |
|
"learning_rate": 1.0267373268275049e-05, |
|
"loss": 0.0303, |
|
"step": 4970 |
|
}, |
|
{ |
|
"epoch": 8.045234248788368, |
|
"grad_norm": 0.26860660314559937, |
|
"learning_rate": 1.0105768567029655e-05, |
|
"loss": 0.0314, |
|
"step": 4980 |
|
}, |
|
{ |
|
"epoch": 8.061389337641357, |
|
"grad_norm": 0.3127424716949463, |
|
"learning_rate": 9.945302683364566e-06, |
|
"loss": 0.0224, |
|
"step": 4990 |
|
}, |
|
{ |
|
"epoch": 8.077544426494345, |
|
"grad_norm": 0.3091331422328949, |
|
"learning_rate": 9.785980197926242e-06, |
|
"loss": 0.0267, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 8.093699515347334, |
|
"grad_norm": 0.3343771696090698, |
|
"learning_rate": 9.627805658721756e-06, |
|
"loss": 0.0311, |
|
"step": 5010 |
|
}, |
|
{ |
|
"epoch": 8.109854604200324, |
|
"grad_norm": 0.37236693501472473, |
|
"learning_rate": 9.470783580989029e-06, |
|
"loss": 0.0261, |
|
"step": 5020 |
|
}, |
|
{ |
|
"epoch": 8.126009693053312, |
|
"grad_norm": 0.28066885471343994, |
|
"learning_rate": 9.314918447067878e-06, |
|
"loss": 0.0256, |
|
"step": 5030 |
|
}, |
|
{ |
|
"epoch": 8.1421647819063, |
|
"grad_norm": 0.3097597360610962, |
|
"learning_rate": 9.16021470627213e-06, |
|
"loss": 0.0246, |
|
"step": 5040 |
|
}, |
|
{ |
|
"epoch": 8.15831987075929, |
|
"grad_norm": 0.2532176971435547, |
|
"learning_rate": 9.006676774762535e-06, |
|
"loss": 0.0238, |
|
"step": 5050 |
|
}, |
|
{ |
|
"epoch": 8.174474959612278, |
|
"grad_norm": 0.6101159453392029, |
|
"learning_rate": 8.854309035420772e-06, |
|
"loss": 0.0248, |
|
"step": 5060 |
|
}, |
|
{ |
|
"epoch": 8.190630048465266, |
|
"grad_norm": 0.36472347378730774, |
|
"learning_rate": 8.703115837724274e-06, |
|
"loss": 0.0215, |
|
"step": 5070 |
|
}, |
|
{ |
|
"epoch": 8.206785137318255, |
|
"grad_norm": 0.2975756525993347, |
|
"learning_rate": 8.553101497622162e-06, |
|
"loss": 0.0258, |
|
"step": 5080 |
|
}, |
|
{ |
|
"epoch": 8.222940226171245, |
|
"grad_norm": 0.25580790638923645, |
|
"learning_rate": 8.404270297411904e-06, |
|
"loss": 0.025, |
|
"step": 5090 |
|
}, |
|
{ |
|
"epoch": 8.239095315024233, |
|
"grad_norm": 0.3066563606262207, |
|
"learning_rate": 8.256626485617219e-06, |
|
"loss": 0.0303, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 8.255250403877222, |
|
"grad_norm": 0.44430306553840637, |
|
"learning_rate": 8.110174276866683e-06, |
|
"loss": 0.0224, |
|
"step": 5110 |
|
}, |
|
{ |
|
"epoch": 8.27140549273021, |
|
"grad_norm": 0.3054925799369812, |
|
"learning_rate": 7.964917851773496e-06, |
|
"loss": 0.0278, |
|
"step": 5120 |
|
}, |
|
{ |
|
"epoch": 8.287560581583199, |
|
"grad_norm": 0.25573277473449707, |
|
"learning_rate": 7.820861356816078e-06, |
|
"loss": 0.0211, |
|
"step": 5130 |
|
}, |
|
{ |
|
"epoch": 8.303715670436187, |
|
"grad_norm": 0.3430786728858948, |
|
"learning_rate": 7.678008904219786e-06, |
|
"loss": 0.0237, |
|
"step": 5140 |
|
}, |
|
{ |
|
"epoch": 8.319870759289175, |
|
"grad_norm": 0.4758915603160858, |
|
"learning_rate": 7.536364571839438e-06, |
|
"loss": 0.0215, |
|
"step": 5150 |
|
}, |
|
{ |
|
"epoch": 8.336025848142166, |
|
"grad_norm": 0.3592261075973511, |
|
"learning_rate": 7.3959324030429654e-06, |
|
"loss": 0.0266, |
|
"step": 5160 |
|
}, |
|
{ |
|
"epoch": 8.352180936995154, |
|
"grad_norm": 0.33260300755500793, |
|
"learning_rate": 7.256716406595948e-06, |
|
"loss": 0.0189, |
|
"step": 5170 |
|
}, |
|
{ |
|
"epoch": 8.368336025848143, |
|
"grad_norm": 0.5978755950927734, |
|
"learning_rate": 7.118720556547259e-06, |
|
"loss": 0.0236, |
|
"step": 5180 |
|
}, |
|
{ |
|
"epoch": 8.384491114701131, |
|
"grad_norm": 0.18789972364902496, |
|
"learning_rate": 6.9819487921155116e-06, |
|
"loss": 0.0234, |
|
"step": 5190 |
|
}, |
|
{ |
|
"epoch": 8.40064620355412, |
|
"grad_norm": 0.31928345561027527, |
|
"learning_rate": 6.846405017576718e-06, |
|
"loss": 0.0281, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 8.416801292407108, |
|
"grad_norm": 0.34838926792144775, |
|
"learning_rate": 6.712093102152739e-06, |
|
"loss": 0.0276, |
|
"step": 5210 |
|
}, |
|
{ |
|
"epoch": 8.432956381260096, |
|
"grad_norm": 0.37636154890060425, |
|
"learning_rate": 6.579016879900924e-06, |
|
"loss": 0.0251, |
|
"step": 5220 |
|
}, |
|
{ |
|
"epoch": 8.449111470113085, |
|
"grad_norm": 0.26992267370224, |
|
"learning_rate": 6.447180149604603e-06, |
|
"loss": 0.0298, |
|
"step": 5230 |
|
}, |
|
{ |
|
"epoch": 8.465266558966075, |
|
"grad_norm": 0.31432321667671204, |
|
"learning_rate": 6.316586674664654e-06, |
|
"loss": 0.0225, |
|
"step": 5240 |
|
}, |
|
{ |
|
"epoch": 8.481421647819063, |
|
"grad_norm": 0.4834333062171936, |
|
"learning_rate": 6.187240182992126e-06, |
|
"loss": 0.0211, |
|
"step": 5250 |
|
}, |
|
{ |
|
"epoch": 8.497576736672052, |
|
"grad_norm": 0.3510620594024658, |
|
"learning_rate": 6.059144366901736e-06, |
|
"loss": 0.0267, |
|
"step": 5260 |
|
}, |
|
{ |
|
"epoch": 8.51373182552504, |
|
"grad_norm": 0.24435961246490479, |
|
"learning_rate": 5.932302883006546e-06, |
|
"loss": 0.0264, |
|
"step": 5270 |
|
}, |
|
{ |
|
"epoch": 8.529886914378029, |
|
"grad_norm": 0.3356267809867859, |
|
"learning_rate": 5.806719352113521e-06, |
|
"loss": 0.0284, |
|
"step": 5280 |
|
}, |
|
{ |
|
"epoch": 8.546042003231017, |
|
"grad_norm": 0.46301284432411194, |
|
"learning_rate": 5.682397359120245e-06, |
|
"loss": 0.0232, |
|
"step": 5290 |
|
}, |
|
{ |
|
"epoch": 8.562197092084006, |
|
"grad_norm": 0.37262049317359924, |
|
"learning_rate": 5.5593404529124875e-06, |
|
"loss": 0.0226, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 8.578352180936996, |
|
"grad_norm": 0.5886579155921936, |
|
"learning_rate": 5.437552146263003e-06, |
|
"loss": 0.0276, |
|
"step": 5310 |
|
}, |
|
{ |
|
"epoch": 8.594507269789984, |
|
"grad_norm": 0.3321017324924469, |
|
"learning_rate": 5.3170359157311445e-06, |
|
"loss": 0.0234, |
|
"step": 5320 |
|
}, |
|
{ |
|
"epoch": 8.610662358642973, |
|
"grad_norm": 0.5492444038391113, |
|
"learning_rate": 5.197795201563743e-06, |
|
"loss": 0.0242, |
|
"step": 5330 |
|
}, |
|
{ |
|
"epoch": 8.626817447495961, |
|
"grad_norm": 0.3654116690158844, |
|
"learning_rate": 5.07983340759679e-06, |
|
"loss": 0.0251, |
|
"step": 5340 |
|
}, |
|
{ |
|
"epoch": 8.64297253634895, |
|
"grad_norm": 0.3987561762332916, |
|
"learning_rate": 4.963153901158352e-06, |
|
"loss": 0.0219, |
|
"step": 5350 |
|
}, |
|
{ |
|
"epoch": 8.659127625201938, |
|
"grad_norm": 0.2271428108215332, |
|
"learning_rate": 4.847760012972402e-06, |
|
"loss": 0.0255, |
|
"step": 5360 |
|
}, |
|
{ |
|
"epoch": 8.675282714054926, |
|
"grad_norm": 0.3363126218318939, |
|
"learning_rate": 4.733655037063761e-06, |
|
"loss": 0.026, |
|
"step": 5370 |
|
}, |
|
{ |
|
"epoch": 8.691437802907917, |
|
"grad_norm": 0.4031514823436737, |
|
"learning_rate": 4.620842230664052e-06, |
|
"loss": 0.0263, |
|
"step": 5380 |
|
}, |
|
{ |
|
"epoch": 8.707592891760905, |
|
"grad_norm": 0.4956108033657074, |
|
"learning_rate": 4.509324814118754e-06, |
|
"loss": 0.0219, |
|
"step": 5390 |
|
}, |
|
{ |
|
"epoch": 8.723747980613894, |
|
"grad_norm": 0.3220359981060028, |
|
"learning_rate": 4.39910597079522e-06, |
|
"loss": 0.0241, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 8.739903069466882, |
|
"grad_norm": 0.2299569994211197, |
|
"learning_rate": 4.290188846991866e-06, |
|
"loss": 0.0255, |
|
"step": 5410 |
|
}, |
|
{ |
|
"epoch": 8.75605815831987, |
|
"grad_norm": 0.2428327053785324, |
|
"learning_rate": 4.182576551848283e-06, |
|
"loss": 0.0212, |
|
"step": 5420 |
|
}, |
|
{ |
|
"epoch": 8.772213247172859, |
|
"grad_norm": 0.42381399869918823, |
|
"learning_rate": 4.076272157256577e-06, |
|
"loss": 0.0218, |
|
"step": 5430 |
|
}, |
|
{ |
|
"epoch": 8.788368336025847, |
|
"grad_norm": 0.4133065342903137, |
|
"learning_rate": 3.971278697773584e-06, |
|
"loss": 0.024, |
|
"step": 5440 |
|
}, |
|
{ |
|
"epoch": 8.804523424878838, |
|
"grad_norm": 0.5403354167938232, |
|
"learning_rate": 3.86759917053432e-06, |
|
"loss": 0.0275, |
|
"step": 5450 |
|
}, |
|
{ |
|
"epoch": 8.820678513731826, |
|
"grad_norm": 0.4583636224269867, |
|
"learning_rate": 3.765236535166361e-06, |
|
"loss": 0.0239, |
|
"step": 5460 |
|
}, |
|
{ |
|
"epoch": 8.836833602584814, |
|
"grad_norm": 0.28426027297973633, |
|
"learning_rate": 3.6641937137054382e-06, |
|
"loss": 0.0212, |
|
"step": 5470 |
|
}, |
|
{ |
|
"epoch": 8.852988691437803, |
|
"grad_norm": 0.3920319676399231, |
|
"learning_rate": 3.564473590511941e-06, |
|
"loss": 0.0168, |
|
"step": 5480 |
|
}, |
|
{ |
|
"epoch": 8.869143780290791, |
|
"grad_norm": 0.28189295530319214, |
|
"learning_rate": 3.4660790121886387e-06, |
|
"loss": 0.0246, |
|
"step": 5490 |
|
}, |
|
{ |
|
"epoch": 8.88529886914378, |
|
"grad_norm": 0.24748258292675018, |
|
"learning_rate": 3.369012787499387e-06, |
|
"loss": 0.0185, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 8.901453957996768, |
|
"grad_norm": 0.6315116286277771, |
|
"learning_rate": 3.273277687288978e-06, |
|
"loss": 0.025, |
|
"step": 5510 |
|
}, |
|
{ |
|
"epoch": 8.917609046849758, |
|
"grad_norm": 0.34694400429725647, |
|
"learning_rate": 3.178876444404022e-06, |
|
"loss": 0.0188, |
|
"step": 5520 |
|
}, |
|
{ |
|
"epoch": 8.933764135702747, |
|
"grad_norm": 0.38088199496269226, |
|
"learning_rate": 3.0858117536149365e-06, |
|
"loss": 0.0278, |
|
"step": 5530 |
|
}, |
|
{ |
|
"epoch": 8.949919224555735, |
|
"grad_norm": 0.2522503435611725, |
|
"learning_rate": 2.9940862715390485e-06, |
|
"loss": 0.021, |
|
"step": 5540 |
|
}, |
|
{ |
|
"epoch": 8.966074313408724, |
|
"grad_norm": 0.20651240646839142, |
|
"learning_rate": 2.9037026165647186e-06, |
|
"loss": 0.02, |
|
"step": 5550 |
|
}, |
|
{ |
|
"epoch": 8.982229402261712, |
|
"grad_norm": 0.24644720554351807, |
|
"learning_rate": 2.8146633687766267e-06, |
|
"loss": 0.0196, |
|
"step": 5560 |
|
}, |
|
{ |
|
"epoch": 8.9983844911147, |
|
"grad_norm": 0.26605224609375, |
|
"learning_rate": 2.7269710698821004e-06, |
|
"loss": 0.0205, |
|
"step": 5570 |
|
}, |
|
{ |
|
"epoch": 9.014539579967689, |
|
"grad_norm": 0.3996153473854065, |
|
"learning_rate": 2.640628223138597e-06, |
|
"loss": 0.0206, |
|
"step": 5580 |
|
}, |
|
{ |
|
"epoch": 9.03069466882068, |
|
"grad_norm": 0.2248448133468628, |
|
"learning_rate": 2.555637293282187e-06, |
|
"loss": 0.0236, |
|
"step": 5590 |
|
}, |
|
{ |
|
"epoch": 9.046849757673668, |
|
"grad_norm": 0.32477447390556335, |
|
"learning_rate": 2.4720007064572504e-06, |
|
"loss": 0.0195, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 9.063004846526656, |
|
"grad_norm": 0.395511656999588, |
|
"learning_rate": 2.389720850147181e-06, |
|
"loss": 0.0286, |
|
"step": 5610 |
|
}, |
|
{ |
|
"epoch": 9.079159935379645, |
|
"grad_norm": 0.3446315824985504, |
|
"learning_rate": 2.308800073106282e-06, |
|
"loss": 0.0249, |
|
"step": 5620 |
|
}, |
|
{ |
|
"epoch": 9.095315024232633, |
|
"grad_norm": 0.2906045615673065, |
|
"learning_rate": 2.2292406852926383e-06, |
|
"loss": 0.0199, |
|
"step": 5630 |
|
}, |
|
{ |
|
"epoch": 9.111470113085621, |
|
"grad_norm": 0.250783771276474, |
|
"learning_rate": 2.1510449578022674e-06, |
|
"loss": 0.0241, |
|
"step": 5640 |
|
}, |
|
{ |
|
"epoch": 9.12762520193861, |
|
"grad_norm": 0.2330540418624878, |
|
"learning_rate": 2.074215122804235e-06, |
|
"loss": 0.021, |
|
"step": 5650 |
|
}, |
|
{ |
|
"epoch": 9.1437802907916, |
|
"grad_norm": 0.4786008894443512, |
|
"learning_rate": 1.998753373476936e-06, |
|
"loss": 0.0214, |
|
"step": 5660 |
|
}, |
|
{ |
|
"epoch": 9.159935379644589, |
|
"grad_norm": 0.30990907549858093, |
|
"learning_rate": 1.924661863945498e-06, |
|
"loss": 0.0229, |
|
"step": 5670 |
|
}, |
|
{ |
|
"epoch": 9.176090468497577, |
|
"grad_norm": 0.3807198107242584, |
|
"learning_rate": 1.851942709220328e-06, |
|
"loss": 0.024, |
|
"step": 5680 |
|
}, |
|
{ |
|
"epoch": 9.192245557350565, |
|
"grad_norm": 0.2552647590637207, |
|
"learning_rate": 1.7805979851366505e-06, |
|
"loss": 0.0262, |
|
"step": 5690 |
|
}, |
|
{ |
|
"epoch": 9.208400646203554, |
|
"grad_norm": 0.2620588541030884, |
|
"learning_rate": 1.7106297282953376e-06, |
|
"loss": 0.021, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 9.224555735056542, |
|
"grad_norm": 0.3594263792037964, |
|
"learning_rate": 1.642039936004719e-06, |
|
"loss": 0.0248, |
|
"step": 5710 |
|
}, |
|
{ |
|
"epoch": 9.24071082390953, |
|
"grad_norm": 0.2621122896671295, |
|
"learning_rate": 1.5748305662236007e-06, |
|
"loss": 0.0262, |
|
"step": 5720 |
|
}, |
|
{ |
|
"epoch": 9.256865912762521, |
|
"grad_norm": 0.2171621173620224, |
|
"learning_rate": 1.5090035375053268e-06, |
|
"loss": 0.0274, |
|
"step": 5730 |
|
}, |
|
{ |
|
"epoch": 9.27302100161551, |
|
"grad_norm": 0.2977316975593567, |
|
"learning_rate": 1.4445607289430784e-06, |
|
"loss": 0.0235, |
|
"step": 5740 |
|
}, |
|
{ |
|
"epoch": 9.289176090468498, |
|
"grad_norm": 0.32373809814453125, |
|
"learning_rate": 1.3815039801161721e-06, |
|
"loss": 0.0229, |
|
"step": 5750 |
|
}, |
|
{ |
|
"epoch": 9.305331179321486, |
|
"grad_norm": 0.5977046489715576, |
|
"learning_rate": 1.31983509103758e-06, |
|
"loss": 0.0222, |
|
"step": 5760 |
|
}, |
|
{ |
|
"epoch": 9.321486268174475, |
|
"grad_norm": 0.3709489405155182, |
|
"learning_rate": 1.2595558221025372e-06, |
|
"loss": 0.0268, |
|
"step": 5770 |
|
}, |
|
{ |
|
"epoch": 9.337641357027463, |
|
"grad_norm": 0.5009976029396057, |
|
"learning_rate": 1.2006678940383098e-06, |
|
"loss": 0.0223, |
|
"step": 5780 |
|
}, |
|
{ |
|
"epoch": 9.353796445880452, |
|
"grad_norm": 0.5957422256469727, |
|
"learning_rate": 1.1431729878550235e-06, |
|
"loss": 0.0245, |
|
"step": 5790 |
|
}, |
|
{ |
|
"epoch": 9.369951534733442, |
|
"grad_norm": 0.30659547448158264, |
|
"learning_rate": 1.0870727447977402e-06, |
|
"loss": 0.0191, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 9.38610662358643, |
|
"grad_norm": 0.41769152879714966, |
|
"learning_rate": 1.0323687662995685e-06, |
|
"loss": 0.0237, |
|
"step": 5810 |
|
}, |
|
{ |
|
"epoch": 9.402261712439419, |
|
"grad_norm": 0.2959621846675873, |
|
"learning_rate": 9.79062613935955e-07, |
|
"loss": 0.0205, |
|
"step": 5820 |
|
}, |
|
{ |
|
"epoch": 9.418416801292407, |
|
"grad_norm": 0.2948278486728668, |
|
"learning_rate": 9.271558093801202e-07, |
|
"loss": 0.0217, |
|
"step": 5830 |
|
}, |
|
{ |
|
"epoch": 9.434571890145396, |
|
"grad_norm": 0.30144789814949036, |
|
"learning_rate": 8.766498343596052e-07, |
|
"loss": 0.0211, |
|
"step": 5840 |
|
}, |
|
{ |
|
"epoch": 9.450726978998384, |
|
"grad_norm": 0.23101864755153656, |
|
"learning_rate": 8.275461306139876e-07, |
|
"loss": 0.0265, |
|
"step": 5850 |
|
}, |
|
{ |
|
"epoch": 9.466882067851373, |
|
"grad_norm": 0.20476695895195007, |
|
"learning_rate": 7.79846099853715e-07, |
|
"loss": 0.0254, |
|
"step": 5860 |
|
}, |
|
{ |
|
"epoch": 9.483037156704363, |
|
"grad_norm": 0.16762420535087585, |
|
"learning_rate": 7.335511037200982e-07, |
|
"loss": 0.0198, |
|
"step": 5870 |
|
}, |
|
{ |
|
"epoch": 9.499192245557351, |
|
"grad_norm": 0.2673914134502411, |
|
"learning_rate": 6.886624637464422e-07, |
|
"loss": 0.0255, |
|
"step": 5880 |
|
}, |
|
{ |
|
"epoch": 9.51534733441034, |
|
"grad_norm": 0.20123161375522614, |
|
"learning_rate": 6.451814613203211e-07, |
|
"loss": 0.0227, |
|
"step": 5890 |
|
}, |
|
{ |
|
"epoch": 9.531502423263328, |
|
"grad_norm": 1.0308411121368408, |
|
"learning_rate": 6.031093376469899e-07, |
|
"loss": 0.0249, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 9.547657512116317, |
|
"grad_norm": 0.38134875893592834, |
|
"learning_rate": 5.624472937139802e-07, |
|
"loss": 0.0192, |
|
"step": 5910 |
|
}, |
|
{ |
|
"epoch": 9.563812600969305, |
|
"grad_norm": 0.685291588306427, |
|
"learning_rate": 5.231964902567721e-07, |
|
"loss": 0.0261, |
|
"step": 5920 |
|
}, |
|
{ |
|
"epoch": 9.579967689822293, |
|
"grad_norm": 0.33756789565086365, |
|
"learning_rate": 4.853580477257203e-07, |
|
"loss": 0.0211, |
|
"step": 5930 |
|
}, |
|
{ |
|
"epoch": 9.596122778675284, |
|
"grad_norm": 0.21888208389282227, |
|
"learning_rate": 4.489330462540076e-07, |
|
"loss": 0.0228, |
|
"step": 5940 |
|
}, |
|
{ |
|
"epoch": 9.612277867528272, |
|
"grad_norm": 0.2490539699792862, |
|
"learning_rate": 4.139225256268475e-07, |
|
"loss": 0.0216, |
|
"step": 5950 |
|
}, |
|
{ |
|
"epoch": 9.62843295638126, |
|
"grad_norm": 0.4339372515678406, |
|
"learning_rate": 3.8032748525179685e-07, |
|
"loss": 0.017, |
|
"step": 5960 |
|
}, |
|
{ |
|
"epoch": 9.644588045234249, |
|
"grad_norm": 0.29505810141563416, |
|
"learning_rate": 3.481488841302283e-07, |
|
"loss": 0.0231, |
|
"step": 5970 |
|
}, |
|
{ |
|
"epoch": 9.660743134087237, |
|
"grad_norm": 0.6434254050254822, |
|
"learning_rate": 3.17387640829947e-07, |
|
"loss": 0.0197, |
|
"step": 5980 |
|
}, |
|
{ |
|
"epoch": 9.676898222940226, |
|
"grad_norm": 0.42414963245391846, |
|
"learning_rate": 2.880446334589837e-07, |
|
"loss": 0.023, |
|
"step": 5990 |
|
}, |
|
{ |
|
"epoch": 9.693053311793214, |
|
"grad_norm": 0.35125264525413513, |
|
"learning_rate": 2.601206996404981e-07, |
|
"loss": 0.0241, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 9.709208400646204, |
|
"grad_norm": 0.2720375657081604, |
|
"learning_rate": 2.336166364889092e-07, |
|
"loss": 0.0194, |
|
"step": 6010 |
|
}, |
|
{ |
|
"epoch": 9.725363489499193, |
|
"grad_norm": 0.36113637685775757, |
|
"learning_rate": 2.0853320058710214e-07, |
|
"loss": 0.0235, |
|
"step": 6020 |
|
}, |
|
{ |
|
"epoch": 9.741518578352181, |
|
"grad_norm": 0.43229228258132935, |
|
"learning_rate": 1.848711079648624e-07, |
|
"loss": 0.0202, |
|
"step": 6030 |
|
}, |
|
{ |
|
"epoch": 9.75767366720517, |
|
"grad_norm": 0.3741385042667389, |
|
"learning_rate": 1.626310340784143e-07, |
|
"loss": 0.0235, |
|
"step": 6040 |
|
}, |
|
{ |
|
"epoch": 9.773828756058158, |
|
"grad_norm": 0.6551222801208496, |
|
"learning_rate": 1.4181361379115855e-07, |
|
"loss": 0.021, |
|
"step": 6050 |
|
}, |
|
{ |
|
"epoch": 9.789983844911147, |
|
"grad_norm": 0.22424794733524323, |
|
"learning_rate": 1.2241944135552574e-07, |
|
"loss": 0.0203, |
|
"step": 6060 |
|
}, |
|
{ |
|
"epoch": 9.806138933764135, |
|
"grad_norm": 0.40934470295906067, |
|
"learning_rate": 1.044490703960288e-07, |
|
"loss": 0.0198, |
|
"step": 6070 |
|
}, |
|
{ |
|
"epoch": 9.822294022617124, |
|
"grad_norm": 0.4585173428058624, |
|
"learning_rate": 8.79030138934589e-08, |
|
"loss": 0.0232, |
|
"step": 6080 |
|
}, |
|
{ |
|
"epoch": 9.838449111470114, |
|
"grad_norm": 0.2896837592124939, |
|
"learning_rate": 7.278174417024164e-08, |
|
"loss": 0.0207, |
|
"step": 6090 |
|
}, |
|
{ |
|
"epoch": 9.854604200323102, |
|
"grad_norm": 0.2858301103115082, |
|
"learning_rate": 5.908569287694787e-08, |
|
"loss": 0.0168, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 9.87075928917609, |
|
"grad_norm": 0.37577348947525024, |
|
"learning_rate": 4.6815250979970195e-08, |
|
"loss": 0.022, |
|
"step": 6110 |
|
}, |
|
{ |
|
"epoch": 9.88691437802908, |
|
"grad_norm": 0.18759319186210632, |
|
"learning_rate": 3.5970768750387405e-08, |
|
"loss": 0.0218, |
|
"step": 6120 |
|
}, |
|
{ |
|
"epoch": 9.903069466882068, |
|
"grad_norm": 0.32479435205459595, |
|
"learning_rate": 2.6552555753917017e-08, |
|
"loss": 0.0237, |
|
"step": 6130 |
|
}, |
|
{ |
|
"epoch": 9.919224555735056, |
|
"grad_norm": 0.24623480439186096, |
|
"learning_rate": 1.8560880842133366e-08, |
|
"loss": 0.0184, |
|
"step": 6140 |
|
}, |
|
{ |
|
"epoch": 9.935379644588044, |
|
"grad_norm": 0.22339710593223572, |
|
"learning_rate": 1.1995972144757116e-08, |
|
"loss": 0.0186, |
|
"step": 6150 |
|
}, |
|
{ |
|
"epoch": 9.951534733441035, |
|
"grad_norm": 0.3188174366950989, |
|
"learning_rate": 6.858017063149369e-09, |
|
"loss": 0.0235, |
|
"step": 6160 |
|
}, |
|
{ |
|
"epoch": 9.967689822294023, |
|
"grad_norm": 0.31466570496559143, |
|
"learning_rate": 3.1471622649714703e-09, |
|
"loss": 0.0245, |
|
"step": 6170 |
|
}, |
|
{ |
|
"epoch": 9.983844911147012, |
|
"grad_norm": 0.3918437659740448, |
|
"learning_rate": 8.635136799939325e-10, |
|
"loss": 0.0212, |
|
"step": 6180 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"grad_norm": 0.3111129403114319, |
|
"learning_rate": 7.136497065518555e-12, |
|
"loss": 0.0254, |
|
"step": 6190 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"step": 6190, |
|
"total_flos": 0.0, |
|
"train_loss": 0.05139205720341841, |
|
"train_runtime": 6075.3943, |
|
"train_samples_per_second": 32.592, |
|
"train_steps_per_second": 1.019 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 6190, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 10, |
|
"save_steps": 20000, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 32, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|