|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 9.998542037907015, |
|
"eval_steps": 500, |
|
"global_step": 30860, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.10011339705167666, |
|
"grad_norm": 0.018291741609573364, |
|
"learning_rate": 9.90051847051199e-05, |
|
"loss": 2.1839, |
|
"step": 309 |
|
}, |
|
{ |
|
"epoch": 0.2002267941033533, |
|
"grad_norm": 0.02208893373608589, |
|
"learning_rate": 9.800388852883993e-05, |
|
"loss": 2.1361, |
|
"step": 618 |
|
}, |
|
{ |
|
"epoch": 0.30034019115502997, |
|
"grad_norm": 0.012775925919413567, |
|
"learning_rate": 9.700259235255995e-05, |
|
"loss": 2.1625, |
|
"step": 927 |
|
}, |
|
{ |
|
"epoch": 0.4004535882067066, |
|
"grad_norm": 0.016916805878281593, |
|
"learning_rate": 9.600129617627997e-05, |
|
"loss": 2.1534, |
|
"step": 1236 |
|
}, |
|
{ |
|
"epoch": 0.5005669852583833, |
|
"grad_norm": 0.006638580933213234, |
|
"learning_rate": 9.5e-05, |
|
"loss": 2.1464, |
|
"step": 1545 |
|
}, |
|
{ |
|
"epoch": 0.6006803823100599, |
|
"grad_norm": 0.013361390680074692, |
|
"learning_rate": 9.399870382372004e-05, |
|
"loss": 2.1685, |
|
"step": 1854 |
|
}, |
|
{ |
|
"epoch": 0.7007937793617366, |
|
"grad_norm": 0.005704471841454506, |
|
"learning_rate": 9.300064808814e-05, |
|
"loss": 2.1539, |
|
"step": 2163 |
|
}, |
|
{ |
|
"epoch": 0.8009071764134132, |
|
"grad_norm": 0.02282548137009144, |
|
"learning_rate": 9.199935191186002e-05, |
|
"loss": 2.1799, |
|
"step": 2472 |
|
}, |
|
{ |
|
"epoch": 0.9010205734650899, |
|
"grad_norm": 0.011914879083633423, |
|
"learning_rate": 9.099805573558005e-05, |
|
"loss": 2.1487, |
|
"step": 2781 |
|
}, |
|
{ |
|
"epoch": 1.000971974728657, |
|
"grad_norm": 0.020065952092409134, |
|
"learning_rate": 8.999675955930007e-05, |
|
"loss": 2.1416, |
|
"step": 3090 |
|
}, |
|
{ |
|
"epoch": 1.1010853717803337, |
|
"grad_norm": 0.020726900547742844, |
|
"learning_rate": 8.899546338302009e-05, |
|
"loss": 2.1422, |
|
"step": 3399 |
|
}, |
|
{ |
|
"epoch": 1.2011987688320103, |
|
"grad_norm": 0.02327028475701809, |
|
"learning_rate": 8.799416720674012e-05, |
|
"loss": 2.149, |
|
"step": 3708 |
|
}, |
|
{ |
|
"epoch": 1.301312165883687, |
|
"grad_norm": 0.03631984442472458, |
|
"learning_rate": 8.699611147116008e-05, |
|
"loss": 2.1605, |
|
"step": 4017 |
|
}, |
|
{ |
|
"epoch": 1.4014255629353638, |
|
"grad_norm": 0.03152529150247574, |
|
"learning_rate": 8.59948152948801e-05, |
|
"loss": 2.1436, |
|
"step": 4326 |
|
}, |
|
{ |
|
"epoch": 1.5015389599870403, |
|
"grad_norm": 0.05229083448648453, |
|
"learning_rate": 8.499351911860013e-05, |
|
"loss": 2.1794, |
|
"step": 4635 |
|
}, |
|
{ |
|
"epoch": 1.601652357038717, |
|
"grad_norm": 0.027535825967788696, |
|
"learning_rate": 8.399222294232017e-05, |
|
"loss": 2.1641, |
|
"step": 4944 |
|
}, |
|
{ |
|
"epoch": 1.7017657540903937, |
|
"grad_norm": 0.0046606422401964664, |
|
"learning_rate": 8.299092676604019e-05, |
|
"loss": 2.1242, |
|
"step": 5253 |
|
}, |
|
{ |
|
"epoch": 1.8018791511420704, |
|
"grad_norm": 0.040044769644737244, |
|
"learning_rate": 8.198963058976021e-05, |
|
"loss": 2.14, |
|
"step": 5562 |
|
}, |
|
{ |
|
"epoch": 1.901992548193747, |
|
"grad_norm": 0.01644105464220047, |
|
"learning_rate": 8.098833441348024e-05, |
|
"loss": 2.1488, |
|
"step": 5871 |
|
}, |
|
{ |
|
"epoch": 2.001943949457314, |
|
"grad_norm": 0.01979956403374672, |
|
"learning_rate": 7.99902786779002e-05, |
|
"loss": 2.1586, |
|
"step": 6180 |
|
}, |
|
{ |
|
"epoch": 2.1020573465089907, |
|
"grad_norm": 0.027056917548179626, |
|
"learning_rate": 7.898898250162022e-05, |
|
"loss": 2.1503, |
|
"step": 6489 |
|
}, |
|
{ |
|
"epoch": 2.2021707435606674, |
|
"grad_norm": 0.01640058308839798, |
|
"learning_rate": 7.798768632534025e-05, |
|
"loss": 2.1418, |
|
"step": 6798 |
|
}, |
|
{ |
|
"epoch": 2.302284140612344, |
|
"grad_norm": 0.01607314869761467, |
|
"learning_rate": 7.698639014906027e-05, |
|
"loss": 2.1514, |
|
"step": 7107 |
|
}, |
|
{ |
|
"epoch": 2.4023975376640205, |
|
"grad_norm": 0.017274878919124603, |
|
"learning_rate": 7.59850939727803e-05, |
|
"loss": 2.1465, |
|
"step": 7416 |
|
}, |
|
{ |
|
"epoch": 2.5025109347156973, |
|
"grad_norm": 0.024440627545118332, |
|
"learning_rate": 7.498379779650033e-05, |
|
"loss": 2.1211, |
|
"step": 7725 |
|
}, |
|
{ |
|
"epoch": 2.602624331767374, |
|
"grad_norm": 0.005127054639160633, |
|
"learning_rate": 7.39857420609203e-05, |
|
"loss": 2.1423, |
|
"step": 8034 |
|
}, |
|
{ |
|
"epoch": 2.702737728819051, |
|
"grad_norm": 0.05081469565629959, |
|
"learning_rate": 7.298444588464032e-05, |
|
"loss": 2.1705, |
|
"step": 8343 |
|
}, |
|
{ |
|
"epoch": 2.8028511258707276, |
|
"grad_norm": 0.01722005568444729, |
|
"learning_rate": 7.198314970836034e-05, |
|
"loss": 2.1462, |
|
"step": 8652 |
|
}, |
|
{ |
|
"epoch": 2.902964522922404, |
|
"grad_norm": 0.03728850930929184, |
|
"learning_rate": 7.098185353208037e-05, |
|
"loss": 2.1306, |
|
"step": 8961 |
|
}, |
|
{ |
|
"epoch": 3.002915924185971, |
|
"grad_norm": 0.016364697366952896, |
|
"learning_rate": 6.998055735580039e-05, |
|
"loss": 2.1639, |
|
"step": 9270 |
|
}, |
|
{ |
|
"epoch": 3.103029321237648, |
|
"grad_norm": 0.005412334576249123, |
|
"learning_rate": 6.897926117952041e-05, |
|
"loss": 2.1624, |
|
"step": 9579 |
|
}, |
|
{ |
|
"epoch": 3.2031427182893246, |
|
"grad_norm": 0.0071863215416669846, |
|
"learning_rate": 6.797796500324045e-05, |
|
"loss": 2.1682, |
|
"step": 9888 |
|
}, |
|
{ |
|
"epoch": 3.303256115341001, |
|
"grad_norm": 0.01707269623875618, |
|
"learning_rate": 6.69799092676604e-05, |
|
"loss": 2.129, |
|
"step": 10197 |
|
}, |
|
{ |
|
"epoch": 3.4033695123926777, |
|
"grad_norm": 0.0162820965051651, |
|
"learning_rate": 6.597861309138042e-05, |
|
"loss": 2.1552, |
|
"step": 10506 |
|
}, |
|
{ |
|
"epoch": 3.5034829094443545, |
|
"grad_norm": 0.005164678208529949, |
|
"learning_rate": 6.497731691510046e-05, |
|
"loss": 2.137, |
|
"step": 10815 |
|
}, |
|
{ |
|
"epoch": 3.6035963064960312, |
|
"grad_norm": 0.01631810888648033, |
|
"learning_rate": 6.397602073882049e-05, |
|
"loss": 2.1578, |
|
"step": 11124 |
|
}, |
|
{ |
|
"epoch": 3.703709703547708, |
|
"grad_norm": 0.004154821392148733, |
|
"learning_rate": 6.297472456254051e-05, |
|
"loss": 2.1602, |
|
"step": 11433 |
|
}, |
|
{ |
|
"epoch": 3.8038231005993843, |
|
"grad_norm": 0.004850070457905531, |
|
"learning_rate": 6.197342838626053e-05, |
|
"loss": 2.1412, |
|
"step": 11742 |
|
}, |
|
{ |
|
"epoch": 3.903936497651061, |
|
"grad_norm": 0.0028279961552470922, |
|
"learning_rate": 6.0975372650680494e-05, |
|
"loss": 2.1338, |
|
"step": 12051 |
|
}, |
|
{ |
|
"epoch": 4.003887898914628, |
|
"grad_norm": 0.016007574275135994, |
|
"learning_rate": 5.997407647440052e-05, |
|
"loss": 2.1753, |
|
"step": 12360 |
|
}, |
|
{ |
|
"epoch": 4.104001295966305, |
|
"grad_norm": 0.013178674504160881, |
|
"learning_rate": 5.897278029812054e-05, |
|
"loss": 2.1533, |
|
"step": 12669 |
|
}, |
|
{ |
|
"epoch": 4.204114693017981, |
|
"grad_norm": 0.010946434922516346, |
|
"learning_rate": 5.797148412184057e-05, |
|
"loss": 2.1506, |
|
"step": 12978 |
|
}, |
|
{ |
|
"epoch": 4.304228090069659, |
|
"grad_norm": 0.06032814085483551, |
|
"learning_rate": 5.6970187945560595e-05, |
|
"loss": 2.1408, |
|
"step": 13287 |
|
}, |
|
{ |
|
"epoch": 4.404341487121335, |
|
"grad_norm": 0.036340948194265366, |
|
"learning_rate": 5.596889176928063e-05, |
|
"loss": 2.1317, |
|
"step": 13596 |
|
}, |
|
{ |
|
"epoch": 4.505750850477888, |
|
"grad_norm": 0.022454094141721725, |
|
"learning_rate": 5.4967595593000656e-05, |
|
"loss": 2.1524, |
|
"step": 13905 |
|
}, |
|
{ |
|
"epoch": 4.605864247529564, |
|
"grad_norm": 0.01212249230593443, |
|
"learning_rate": 5.396953985742061e-05, |
|
"loss": 2.1681, |
|
"step": 14214 |
|
}, |
|
{ |
|
"epoch": 4.705977644581241, |
|
"grad_norm": 0.021747123450040817, |
|
"learning_rate": 5.296824368114064e-05, |
|
"loss": 2.1562, |
|
"step": 14523 |
|
}, |
|
{ |
|
"epoch": 4.806091041632918, |
|
"grad_norm": 0.005955239292234182, |
|
"learning_rate": 5.196694750486067e-05, |
|
"loss": 2.1828, |
|
"step": 14832 |
|
}, |
|
{ |
|
"epoch": 4.906204438684594, |
|
"grad_norm": 0.012121310457587242, |
|
"learning_rate": 5.096565132858069e-05, |
|
"loss": 2.1437, |
|
"step": 15141 |
|
}, |
|
{ |
|
"epoch": 5.006479831524381, |
|
"grad_norm": 0.015455316752195358, |
|
"learning_rate": 4.996435515230072e-05, |
|
"loss": 2.1528, |
|
"step": 15450 |
|
}, |
|
{ |
|
"epoch": 5.106593228576057, |
|
"grad_norm": 0.0007723022717982531, |
|
"learning_rate": 4.896305897602074e-05, |
|
"loss": 2.1723, |
|
"step": 15759 |
|
}, |
|
{ |
|
"epoch": 5.206706625627733, |
|
"grad_norm": 0.004882230423390865, |
|
"learning_rate": 4.7965003240440704e-05, |
|
"loss": 2.1525, |
|
"step": 16068 |
|
}, |
|
{ |
|
"epoch": 5.30682002267941, |
|
"grad_norm": 0.012511253356933594, |
|
"learning_rate": 4.696370706416073e-05, |
|
"loss": 2.1458, |
|
"step": 16377 |
|
}, |
|
{ |
|
"epoch": 5.406933419731087, |
|
"grad_norm": 0.0262750256806612, |
|
"learning_rate": 4.596241088788075e-05, |
|
"loss": 2.0936, |
|
"step": 16686 |
|
}, |
|
{ |
|
"epoch": 5.507046816782764, |
|
"grad_norm": 0.007834335789084435, |
|
"learning_rate": 4.4961114711600785e-05, |
|
"loss": 2.1322, |
|
"step": 16995 |
|
}, |
|
{ |
|
"epoch": 5.60716021383444, |
|
"grad_norm": 0.020112166181206703, |
|
"learning_rate": 4.3959818535320806e-05, |
|
"loss": 2.1223, |
|
"step": 17304 |
|
}, |
|
{ |
|
"epoch": 5.707273610886117, |
|
"grad_norm": 0.0009047465864568949, |
|
"learning_rate": 4.295852235904083e-05, |
|
"loss": 2.1425, |
|
"step": 17613 |
|
}, |
|
{ |
|
"epoch": 5.807387007937794, |
|
"grad_norm": 0.006675088778138161, |
|
"learning_rate": 4.195722618276085e-05, |
|
"loss": 2.1433, |
|
"step": 17922 |
|
}, |
|
{ |
|
"epoch": 5.90750040498947, |
|
"grad_norm": 0.022665197029709816, |
|
"learning_rate": 4.0959170447180816e-05, |
|
"loss": 2.1942, |
|
"step": 18231 |
|
}, |
|
{ |
|
"epoch": 6.007451806253037, |
|
"grad_norm": 0.039628468453884125, |
|
"learning_rate": 3.995787427090085e-05, |
|
"loss": 2.1444, |
|
"step": 18540 |
|
}, |
|
{ |
|
"epoch": 6.107565203304714, |
|
"grad_norm": 0.0011520631378516555, |
|
"learning_rate": 3.895657809462087e-05, |
|
"loss": 2.1524, |
|
"step": 18849 |
|
}, |
|
{ |
|
"epoch": 6.207678600356391, |
|
"grad_norm": 0.03555034101009369, |
|
"learning_rate": 3.79552819183409e-05, |
|
"loss": 2.1203, |
|
"step": 19158 |
|
}, |
|
{ |
|
"epoch": 6.307791997408067, |
|
"grad_norm": 0.013503223657608032, |
|
"learning_rate": 3.695398574206092e-05, |
|
"loss": 2.1352, |
|
"step": 19467 |
|
}, |
|
{ |
|
"epoch": 6.407905394459744, |
|
"grad_norm": 0.026100030168890953, |
|
"learning_rate": 3.595268956578095e-05, |
|
"loss": 2.1655, |
|
"step": 19776 |
|
}, |
|
{ |
|
"epoch": 6.508018791511421, |
|
"grad_norm": 0.0006254952168092132, |
|
"learning_rate": 3.4954633830200915e-05, |
|
"loss": 2.1352, |
|
"step": 20085 |
|
}, |
|
{ |
|
"epoch": 6.608132188563097, |
|
"grad_norm": 0.01487251278012991, |
|
"learning_rate": 3.3953337653920935e-05, |
|
"loss": 2.1799, |
|
"step": 20394 |
|
}, |
|
{ |
|
"epoch": 6.708245585614774, |
|
"grad_norm": 0.0053134192712605, |
|
"learning_rate": 3.295204147764096e-05, |
|
"loss": 2.1506, |
|
"step": 20703 |
|
}, |
|
{ |
|
"epoch": 6.808358982666451, |
|
"grad_norm": 0.006517978850752115, |
|
"learning_rate": 3.195074530136098e-05, |
|
"loss": 2.1567, |
|
"step": 21012 |
|
}, |
|
{ |
|
"epoch": 6.908472379718127, |
|
"grad_norm": 0.014856001362204552, |
|
"learning_rate": 3.0949449125081016e-05, |
|
"loss": 2.1593, |
|
"step": 21321 |
|
}, |
|
{ |
|
"epoch": 7.008423780981694, |
|
"grad_norm": 0.0019718091934919357, |
|
"learning_rate": 2.994815294880104e-05, |
|
"loss": 2.1634, |
|
"step": 21630 |
|
}, |
|
{ |
|
"epoch": 7.108537178033371, |
|
"grad_norm": 0.017417173832654953, |
|
"learning_rate": 2.8946856772521063e-05, |
|
"loss": 2.1433, |
|
"step": 21939 |
|
}, |
|
{ |
|
"epoch": 7.208650575085048, |
|
"grad_norm": 0.034113720059394836, |
|
"learning_rate": 2.7948801036941023e-05, |
|
"loss": 2.1489, |
|
"step": 22248 |
|
}, |
|
{ |
|
"epoch": 7.308763972136725, |
|
"grad_norm": 0.018758224323391914, |
|
"learning_rate": 2.694750486066105e-05, |
|
"loss": 2.13, |
|
"step": 22557 |
|
}, |
|
{ |
|
"epoch": 7.408877369188401, |
|
"grad_norm": 0.04419185221195221, |
|
"learning_rate": 2.594620868438108e-05, |
|
"loss": 2.1641, |
|
"step": 22866 |
|
}, |
|
{ |
|
"epoch": 7.508990766240077, |
|
"grad_norm": 0.001091700978577137, |
|
"learning_rate": 2.4944912508101104e-05, |
|
"loss": 2.1763, |
|
"step": 23175 |
|
}, |
|
{ |
|
"epoch": 7.609104163291755, |
|
"grad_norm": 0.0011129506165161729, |
|
"learning_rate": 2.3943616331821128e-05, |
|
"loss": 2.125, |
|
"step": 23484 |
|
}, |
|
{ |
|
"epoch": 7.709217560343431, |
|
"grad_norm": 0.007060033269226551, |
|
"learning_rate": 2.2942320155541155e-05, |
|
"loss": 2.1357, |
|
"step": 23793 |
|
}, |
|
{ |
|
"epoch": 7.809330957395108, |
|
"grad_norm": 0.004454698413610458, |
|
"learning_rate": 2.1944264419961118e-05, |
|
"loss": 2.1502, |
|
"step": 24102 |
|
}, |
|
{ |
|
"epoch": 7.9094443544467845, |
|
"grad_norm": 0.014023036696016788, |
|
"learning_rate": 2.0942968243681142e-05, |
|
"loss": 2.1397, |
|
"step": 24411 |
|
}, |
|
{ |
|
"epoch": 8.00939575571035, |
|
"grad_norm": 0.0020535311195999384, |
|
"learning_rate": 1.994167206740117e-05, |
|
"loss": 2.1594, |
|
"step": 24720 |
|
}, |
|
{ |
|
"epoch": 8.109509152762028, |
|
"grad_norm": 0.030191343277692795, |
|
"learning_rate": 1.8940375891121192e-05, |
|
"loss": 2.1379, |
|
"step": 25029 |
|
}, |
|
{ |
|
"epoch": 8.209622549813705, |
|
"grad_norm": 0.02658534049987793, |
|
"learning_rate": 1.793907971484122e-05, |
|
"loss": 2.163, |
|
"step": 25338 |
|
}, |
|
{ |
|
"epoch": 8.30973594686538, |
|
"grad_norm": 0.006291504483669996, |
|
"learning_rate": 1.6937783538561243e-05, |
|
"loss": 2.1445, |
|
"step": 25647 |
|
}, |
|
{ |
|
"epoch": 8.409849343917058, |
|
"grad_norm": 0.013936794362962246, |
|
"learning_rate": 1.593648736228127e-05, |
|
"loss": 2.1571, |
|
"step": 25956 |
|
}, |
|
{ |
|
"epoch": 8.509962740968735, |
|
"grad_norm": 0.031892433762550354, |
|
"learning_rate": 1.4938431626701233e-05, |
|
"loss": 2.1562, |
|
"step": 26265 |
|
}, |
|
{ |
|
"epoch": 8.610076138020412, |
|
"grad_norm": 0.0028675836510956287, |
|
"learning_rate": 1.3937135450421257e-05, |
|
"loss": 2.1373, |
|
"step": 26574 |
|
}, |
|
{ |
|
"epoch": 8.710189535072088, |
|
"grad_norm": 0.027382852509617805, |
|
"learning_rate": 1.2935839274141284e-05, |
|
"loss": 2.1162, |
|
"step": 26883 |
|
}, |
|
{ |
|
"epoch": 8.810302932123765, |
|
"grad_norm": 0.006504488177597523, |
|
"learning_rate": 1.193454309786131e-05, |
|
"loss": 2.1281, |
|
"step": 27192 |
|
}, |
|
{ |
|
"epoch": 8.910416329175442, |
|
"grad_norm": 0.006998216733336449, |
|
"learning_rate": 1.0933246921581337e-05, |
|
"loss": 2.1681, |
|
"step": 27501 |
|
}, |
|
{ |
|
"epoch": 9.01036773043901, |
|
"grad_norm": 0.0019873257260769606, |
|
"learning_rate": 9.931950745301362e-06, |
|
"loss": 2.1397, |
|
"step": 27810 |
|
}, |
|
{ |
|
"epoch": 9.110481127490685, |
|
"grad_norm": 0.04183882847428322, |
|
"learning_rate": 8.933895009721323e-06, |
|
"loss": 2.1425, |
|
"step": 28119 |
|
}, |
|
{ |
|
"epoch": 9.210594524542362, |
|
"grad_norm": 0.008224571123719215, |
|
"learning_rate": 7.932598833441349e-06, |
|
"loss": 2.1567, |
|
"step": 28428 |
|
}, |
|
{ |
|
"epoch": 9.31070792159404, |
|
"grad_norm": 0.009601627476513386, |
|
"learning_rate": 6.931302657161375e-06, |
|
"loss": 2.1334, |
|
"step": 28737 |
|
}, |
|
{ |
|
"epoch": 9.410821318645715, |
|
"grad_norm": 0.028333676978945732, |
|
"learning_rate": 5.9300064808814e-06, |
|
"loss": 2.1732, |
|
"step": 29046 |
|
}, |
|
{ |
|
"epoch": 9.510934715697392, |
|
"grad_norm": 0.01850961521267891, |
|
"learning_rate": 4.9287103046014265e-06, |
|
"loss": 2.1401, |
|
"step": 29355 |
|
}, |
|
{ |
|
"epoch": 9.611048112749069, |
|
"grad_norm": 0.0022975043393671513, |
|
"learning_rate": 3.927414128321452e-06, |
|
"loss": 2.1523, |
|
"step": 29664 |
|
}, |
|
{ |
|
"epoch": 9.711161509800744, |
|
"grad_norm": 0.033216096460819244, |
|
"learning_rate": 2.9261179520414777e-06, |
|
"loss": 2.1497, |
|
"step": 29973 |
|
}, |
|
{ |
|
"epoch": 9.811274906852422, |
|
"grad_norm": 0.024143142625689507, |
|
"learning_rate": 1.928062216461439e-06, |
|
"loss": 2.1504, |
|
"step": 30282 |
|
}, |
|
{ |
|
"epoch": 9.911388303904099, |
|
"grad_norm": 0.0085253044962883, |
|
"learning_rate": 9.267660401814646e-07, |
|
"loss": 2.177, |
|
"step": 30591 |
|
}, |
|
{ |
|
"epoch": 9.998542037907015, |
|
"step": 30860, |
|
"total_flos": 1.461291118888168e+18, |
|
"train_loss": 1.202193304075487, |
|
"train_runtime": 14493.7066, |
|
"train_samples_per_second": 255.536, |
|
"train_steps_per_second": 2.129 |
|
} |
|
], |
|
"logging_steps": 309, |
|
"max_steps": 30860, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 10, |
|
"save_steps": 6800, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.461291118888168e+18, |
|
"train_batch_size": 30, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|