|
{ |
|
"best_global_step": null, |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 99.8648280558393, |
|
"eval_steps": 500, |
|
"global_step": 73400, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.9996595165134491, |
|
"grad_norm": 1.3896363973617554, |
|
"learning_rate": 9.90040871934605e-06, |
|
"loss": 2.3637, |
|
"step": 734 |
|
}, |
|
{ |
|
"epoch": 1.9982975825672455, |
|
"grad_norm": 1.085627794265747, |
|
"learning_rate": 9.80040871934605e-06, |
|
"loss": 2.3048, |
|
"step": 1468 |
|
}, |
|
{ |
|
"epoch": 2.996935648621042, |
|
"grad_norm": 1.7355504035949707, |
|
"learning_rate": 9.700544959128066e-06, |
|
"loss": 2.2626, |
|
"step": 2202 |
|
}, |
|
{ |
|
"epoch": 3.9955737146748382, |
|
"grad_norm": 2.631619691848755, |
|
"learning_rate": 9.600544959128067e-06, |
|
"loss": 2.2366, |
|
"step": 2936 |
|
}, |
|
{ |
|
"epoch": 4.994211780728635, |
|
"grad_norm": 3.0078556537628174, |
|
"learning_rate": 9.500544959128066e-06, |
|
"loss": 2.1902, |
|
"step": 3670 |
|
}, |
|
{ |
|
"epoch": 5.992849846782431, |
|
"grad_norm": 3.808694362640381, |
|
"learning_rate": 9.400681198910083e-06, |
|
"loss": 2.1761, |
|
"step": 4404 |
|
}, |
|
{ |
|
"epoch": 6.991487912836227, |
|
"grad_norm": 2.927497625350952, |
|
"learning_rate": 9.300681198910082e-06, |
|
"loss": 2.1423, |
|
"step": 5138 |
|
}, |
|
{ |
|
"epoch": 7.990125978890024, |
|
"grad_norm": 3.1034231185913086, |
|
"learning_rate": 9.200681198910083e-06, |
|
"loss": 2.1311, |
|
"step": 5872 |
|
}, |
|
{ |
|
"epoch": 8.98876404494382, |
|
"grad_norm": 2.8936243057250977, |
|
"learning_rate": 9.1008174386921e-06, |
|
"loss": 2.1006, |
|
"step": 6606 |
|
}, |
|
{ |
|
"epoch": 9.987402110997616, |
|
"grad_norm": 3.1552979946136475, |
|
"learning_rate": 9.000817438692099e-06, |
|
"loss": 2.1108, |
|
"step": 7340 |
|
}, |
|
{ |
|
"epoch": 10.986040177051413, |
|
"grad_norm": 4.212212562561035, |
|
"learning_rate": 8.900817438692098e-06, |
|
"loss": 2.0826, |
|
"step": 8074 |
|
}, |
|
{ |
|
"epoch": 11.98467824310521, |
|
"grad_norm": 3.0641448497772217, |
|
"learning_rate": 8.800953678474115e-06, |
|
"loss": 2.0827, |
|
"step": 8808 |
|
}, |
|
{ |
|
"epoch": 12.983316309159006, |
|
"grad_norm": 3.0309231281280518, |
|
"learning_rate": 8.700953678474116e-06, |
|
"loss": 2.0707, |
|
"step": 9542 |
|
}, |
|
{ |
|
"epoch": 13.981954375212801, |
|
"grad_norm": 2.910205364227295, |
|
"learning_rate": 8.600953678474115e-06, |
|
"loss": 2.0545, |
|
"step": 10276 |
|
}, |
|
{ |
|
"epoch": 14.980592441266598, |
|
"grad_norm": 2.4057796001434326, |
|
"learning_rate": 8.501089918256132e-06, |
|
"loss": 2.0518, |
|
"step": 11010 |
|
}, |
|
{ |
|
"epoch": 15.979230507320395, |
|
"grad_norm": 3.38080096244812, |
|
"learning_rate": 8.401089918256131e-06, |
|
"loss": 2.0431, |
|
"step": 11744 |
|
}, |
|
{ |
|
"epoch": 16.97786857337419, |
|
"grad_norm": 3.414673328399658, |
|
"learning_rate": 8.301089918256132e-06, |
|
"loss": 2.0278, |
|
"step": 12478 |
|
}, |
|
{ |
|
"epoch": 17.97650663942799, |
|
"grad_norm": 3.2804346084594727, |
|
"learning_rate": 8.201089918256131e-06, |
|
"loss": 2.0394, |
|
"step": 13212 |
|
}, |
|
{ |
|
"epoch": 18.975144705481785, |
|
"grad_norm": 3.313039541244507, |
|
"learning_rate": 8.101089918256132e-06, |
|
"loss": 2.0239, |
|
"step": 13946 |
|
}, |
|
{ |
|
"epoch": 19.973782771535582, |
|
"grad_norm": 3.2474405765533447, |
|
"learning_rate": 8.001226158038149e-06, |
|
"loss": 2.0155, |
|
"step": 14680 |
|
}, |
|
{ |
|
"epoch": 20.972420837589375, |
|
"grad_norm": 3.154122829437256, |
|
"learning_rate": 7.901226158038148e-06, |
|
"loss": 2.0183, |
|
"step": 15414 |
|
}, |
|
{ |
|
"epoch": 21.971058903643172, |
|
"grad_norm": 4.2719407081604, |
|
"learning_rate": 7.801226158038147e-06, |
|
"loss": 2.0059, |
|
"step": 16148 |
|
}, |
|
{ |
|
"epoch": 22.96969696969697, |
|
"grad_norm": 3.08585262298584, |
|
"learning_rate": 7.701226158038148e-06, |
|
"loss": 2.007, |
|
"step": 16882 |
|
}, |
|
{ |
|
"epoch": 23.968335035750766, |
|
"grad_norm": 3.8600127696990967, |
|
"learning_rate": 7.601362397820165e-06, |
|
"loss": 1.9813, |
|
"step": 17616 |
|
}, |
|
{ |
|
"epoch": 24.966973101804562, |
|
"grad_norm": 4.064390182495117, |
|
"learning_rate": 7.501362397820165e-06, |
|
"loss": 1.9909, |
|
"step": 18350 |
|
}, |
|
{ |
|
"epoch": 25.96561116785836, |
|
"grad_norm": 5.1680169105529785, |
|
"learning_rate": 7.401362397820164e-06, |
|
"loss": 1.9781, |
|
"step": 19084 |
|
}, |
|
{ |
|
"epoch": 26.964249233912156, |
|
"grad_norm": 3.716388702392578, |
|
"learning_rate": 7.30149863760218e-06, |
|
"loss": 1.9841, |
|
"step": 19818 |
|
}, |
|
{ |
|
"epoch": 27.962887299965953, |
|
"grad_norm": 1.9636414051055908, |
|
"learning_rate": 7.20149863760218e-06, |
|
"loss": 1.9727, |
|
"step": 20552 |
|
}, |
|
{ |
|
"epoch": 28.96152536601975, |
|
"grad_norm": 2.968393087387085, |
|
"learning_rate": 7.10149863760218e-06, |
|
"loss": 1.9729, |
|
"step": 21286 |
|
}, |
|
{ |
|
"epoch": 29.960163432073543, |
|
"grad_norm": 1.9185447692871094, |
|
"learning_rate": 7.001498637602181e-06, |
|
"loss": 1.9586, |
|
"step": 22020 |
|
}, |
|
{ |
|
"epoch": 30.95880149812734, |
|
"grad_norm": 3.3125815391540527, |
|
"learning_rate": 6.901634877384197e-06, |
|
"loss": 1.9661, |
|
"step": 22754 |
|
}, |
|
{ |
|
"epoch": 31.957439564181136, |
|
"grad_norm": 3.3222153186798096, |
|
"learning_rate": 6.801634877384197e-06, |
|
"loss": 1.9532, |
|
"step": 23488 |
|
}, |
|
{ |
|
"epoch": 32.95607763023494, |
|
"grad_norm": 2.3255455493927, |
|
"learning_rate": 6.701634877384197e-06, |
|
"loss": 1.9445, |
|
"step": 24222 |
|
}, |
|
{ |
|
"epoch": 33.95471569628873, |
|
"grad_norm": 3.436879873275757, |
|
"learning_rate": 6.6017711171662135e-06, |
|
"loss": 1.9564, |
|
"step": 24956 |
|
}, |
|
{ |
|
"epoch": 34.95335376234252, |
|
"grad_norm": 2.789512872695923, |
|
"learning_rate": 6.501771117166214e-06, |
|
"loss": 1.9458, |
|
"step": 25690 |
|
}, |
|
{ |
|
"epoch": 35.951991828396324, |
|
"grad_norm": 2.496615409851074, |
|
"learning_rate": 6.401771117166214e-06, |
|
"loss": 1.9314, |
|
"step": 26424 |
|
}, |
|
{ |
|
"epoch": 36.95062989445012, |
|
"grad_norm": 3.680979013442993, |
|
"learning_rate": 6.301907356948229e-06, |
|
"loss": 1.9481, |
|
"step": 27158 |
|
}, |
|
{ |
|
"epoch": 37.94926796050392, |
|
"grad_norm": 3.8750393390655518, |
|
"learning_rate": 6.201907356948229e-06, |
|
"loss": 1.9346, |
|
"step": 27892 |
|
}, |
|
{ |
|
"epoch": 38.94790602655771, |
|
"grad_norm": 3.358818531036377, |
|
"learning_rate": 6.101907356948229e-06, |
|
"loss": 1.9426, |
|
"step": 28626 |
|
}, |
|
{ |
|
"epoch": 39.94654409261151, |
|
"grad_norm": 3.4985063076019287, |
|
"learning_rate": 6.001907356948229e-06, |
|
"loss": 1.9319, |
|
"step": 29360 |
|
}, |
|
{ |
|
"epoch": 40.945182158665304, |
|
"grad_norm": 3.5801708698272705, |
|
"learning_rate": 5.9020435967302455e-06, |
|
"loss": 1.9316, |
|
"step": 30094 |
|
}, |
|
{ |
|
"epoch": 41.943820224719104, |
|
"grad_norm": 2.131335973739624, |
|
"learning_rate": 5.802043596730246e-06, |
|
"loss": 1.9267, |
|
"step": 30828 |
|
}, |
|
{ |
|
"epoch": 42.9424582907729, |
|
"grad_norm": 3.048663854598999, |
|
"learning_rate": 5.702043596730246e-06, |
|
"loss": 1.9313, |
|
"step": 31562 |
|
}, |
|
{ |
|
"epoch": 43.94109635682669, |
|
"grad_norm": 4.18536376953125, |
|
"learning_rate": 5.602043596730246e-06, |
|
"loss": 1.9307, |
|
"step": 32296 |
|
}, |
|
{ |
|
"epoch": 44.93973442288049, |
|
"grad_norm": 3.1449191570281982, |
|
"learning_rate": 5.502043596730245e-06, |
|
"loss": 1.9121, |
|
"step": 33030 |
|
}, |
|
{ |
|
"epoch": 45.938372488934284, |
|
"grad_norm": 2.445446491241455, |
|
"learning_rate": 5.4021798365122625e-06, |
|
"loss": 1.9147, |
|
"step": 33764 |
|
}, |
|
{ |
|
"epoch": 46.937010554988085, |
|
"grad_norm": 3.044128656387329, |
|
"learning_rate": 5.302316076294278e-06, |
|
"loss": 1.9179, |
|
"step": 34498 |
|
}, |
|
{ |
|
"epoch": 47.93564862104188, |
|
"grad_norm": 2.411952018737793, |
|
"learning_rate": 5.202316076294278e-06, |
|
"loss": 1.9175, |
|
"step": 35232 |
|
}, |
|
{ |
|
"epoch": 48.93428668709568, |
|
"grad_norm": 4.5417799949646, |
|
"learning_rate": 5.1023160762942784e-06, |
|
"loss": 1.9139, |
|
"step": 35966 |
|
}, |
|
{ |
|
"epoch": 49.93292475314947, |
|
"grad_norm": 3.36789608001709, |
|
"learning_rate": 5.002452316076295e-06, |
|
"loss": 1.8999, |
|
"step": 36700 |
|
}, |
|
{ |
|
"epoch": 50.93156281920327, |
|
"grad_norm": 1.4864888191223145, |
|
"learning_rate": 4.902452316076295e-06, |
|
"loss": 1.9151, |
|
"step": 37434 |
|
}, |
|
{ |
|
"epoch": 51.930200885257065, |
|
"grad_norm": 2.543835163116455, |
|
"learning_rate": 4.802452316076295e-06, |
|
"loss": 1.9049, |
|
"step": 38168 |
|
}, |
|
{ |
|
"epoch": 52.92883895131086, |
|
"grad_norm": 2.121729850769043, |
|
"learning_rate": 4.7024523160762945e-06, |
|
"loss": 1.9099, |
|
"step": 38902 |
|
}, |
|
{ |
|
"epoch": 53.92747701736466, |
|
"grad_norm": 3.2268223762512207, |
|
"learning_rate": 4.602452316076295e-06, |
|
"loss": 1.9071, |
|
"step": 39636 |
|
}, |
|
{ |
|
"epoch": 54.92611508341845, |
|
"grad_norm": 3.928720235824585, |
|
"learning_rate": 4.502452316076295e-06, |
|
"loss": 1.8883, |
|
"step": 40370 |
|
}, |
|
{ |
|
"epoch": 55.92475314947225, |
|
"grad_norm": 2.6059372425079346, |
|
"learning_rate": 4.402588555858311e-06, |
|
"loss": 1.9033, |
|
"step": 41104 |
|
}, |
|
{ |
|
"epoch": 56.923391215526046, |
|
"grad_norm": 2.899505615234375, |
|
"learning_rate": 4.3025885558583105e-06, |
|
"loss": 1.8852, |
|
"step": 41838 |
|
}, |
|
{ |
|
"epoch": 57.922029281579846, |
|
"grad_norm": 2.1250553131103516, |
|
"learning_rate": 4.202588555858311e-06, |
|
"loss": 1.8919, |
|
"step": 42572 |
|
}, |
|
{ |
|
"epoch": 58.92066734763364, |
|
"grad_norm": 3.6068265438079834, |
|
"learning_rate": 4.102588555858311e-06, |
|
"loss": 1.8801, |
|
"step": 43306 |
|
}, |
|
{ |
|
"epoch": 59.91930541368744, |
|
"grad_norm": 3.1520488262176514, |
|
"learning_rate": 4.002588555858311e-06, |
|
"loss": 1.8997, |
|
"step": 44040 |
|
}, |
|
{ |
|
"epoch": 60.91794347974123, |
|
"grad_norm": 2.7997710704803467, |
|
"learning_rate": 3.902588555858311e-06, |
|
"loss": 1.8942, |
|
"step": 44774 |
|
}, |
|
{ |
|
"epoch": 61.916581545795026, |
|
"grad_norm": 2.2547781467437744, |
|
"learning_rate": 3.8027247956403276e-06, |
|
"loss": 1.8996, |
|
"step": 45508 |
|
}, |
|
{ |
|
"epoch": 62.915219611848826, |
|
"grad_norm": 2.583367347717285, |
|
"learning_rate": 3.7027247956403272e-06, |
|
"loss": 1.8811, |
|
"step": 46242 |
|
}, |
|
{ |
|
"epoch": 63.91385767790262, |
|
"grad_norm": 3.9086215496063232, |
|
"learning_rate": 3.6027247956403274e-06, |
|
"loss": 1.8878, |
|
"step": 46976 |
|
}, |
|
{ |
|
"epoch": 64.91249574395641, |
|
"grad_norm": 1.8989681005477905, |
|
"learning_rate": 3.50299727520436e-06, |
|
"loss": 1.8987, |
|
"step": 47710 |
|
}, |
|
{ |
|
"epoch": 65.91113381001021, |
|
"grad_norm": 2.5425479412078857, |
|
"learning_rate": 3.40299727520436e-06, |
|
"loss": 1.8928, |
|
"step": 48444 |
|
}, |
|
{ |
|
"epoch": 66.90977187606401, |
|
"grad_norm": 2.8161380290985107, |
|
"learning_rate": 3.3029972752043598e-06, |
|
"loss": 1.8833, |
|
"step": 49178 |
|
}, |
|
{ |
|
"epoch": 67.90840994211781, |
|
"grad_norm": 3.328853130340576, |
|
"learning_rate": 3.20299727520436e-06, |
|
"loss": 1.8802, |
|
"step": 49912 |
|
}, |
|
{ |
|
"epoch": 68.9070480081716, |
|
"grad_norm": 1.836472749710083, |
|
"learning_rate": 3.1029972752043596e-06, |
|
"loss": 1.8821, |
|
"step": 50646 |
|
}, |
|
{ |
|
"epoch": 69.9056860742254, |
|
"grad_norm": 3.308563470840454, |
|
"learning_rate": 3.00299727520436e-06, |
|
"loss": 1.8824, |
|
"step": 51380 |
|
}, |
|
{ |
|
"epoch": 70.9043241402792, |
|
"grad_norm": 2.7526936531066895, |
|
"learning_rate": 2.9031335149863767e-06, |
|
"loss": 1.8828, |
|
"step": 52114 |
|
}, |
|
{ |
|
"epoch": 71.90296220633299, |
|
"grad_norm": 2.601332664489746, |
|
"learning_rate": 2.8031335149863763e-06, |
|
"loss": 1.8872, |
|
"step": 52848 |
|
}, |
|
{ |
|
"epoch": 72.90160027238679, |
|
"grad_norm": 3.038666248321533, |
|
"learning_rate": 2.7031335149863765e-06, |
|
"loss": 1.8861, |
|
"step": 53582 |
|
}, |
|
{ |
|
"epoch": 73.90023833844059, |
|
"grad_norm": 2.3815577030181885, |
|
"learning_rate": 2.6032697547683926e-06, |
|
"loss": 1.8768, |
|
"step": 54316 |
|
}, |
|
{ |
|
"epoch": 74.89887640449439, |
|
"grad_norm": 3.253783941268921, |
|
"learning_rate": 2.5032697547683927e-06, |
|
"loss": 1.8767, |
|
"step": 55050 |
|
}, |
|
{ |
|
"epoch": 75.89751447054817, |
|
"grad_norm": 1.8898478746414185, |
|
"learning_rate": 2.403405994550409e-06, |
|
"loss": 1.8778, |
|
"step": 55784 |
|
}, |
|
{ |
|
"epoch": 76.89615253660197, |
|
"grad_norm": 2.8832762241363525, |
|
"learning_rate": 2.303405994550409e-06, |
|
"loss": 1.8816, |
|
"step": 56518 |
|
}, |
|
{ |
|
"epoch": 77.89479060265577, |
|
"grad_norm": 2.767455577850342, |
|
"learning_rate": 2.2034059945504087e-06, |
|
"loss": 1.8817, |
|
"step": 57252 |
|
}, |
|
{ |
|
"epoch": 78.89342866870956, |
|
"grad_norm": 1.9658331871032715, |
|
"learning_rate": 2.1035422343324252e-06, |
|
"loss": 1.8857, |
|
"step": 57986 |
|
}, |
|
{ |
|
"epoch": 79.89206673476336, |
|
"grad_norm": 2.7493135929107666, |
|
"learning_rate": 2.003542234332425e-06, |
|
"loss": 1.8763, |
|
"step": 58720 |
|
}, |
|
{ |
|
"epoch": 80.89070480081716, |
|
"grad_norm": 2.519388198852539, |
|
"learning_rate": 1.9035422343324252e-06, |
|
"loss": 1.8782, |
|
"step": 59454 |
|
}, |
|
{ |
|
"epoch": 81.88934286687096, |
|
"grad_norm": 2.258284091949463, |
|
"learning_rate": 1.8036784741144416e-06, |
|
"loss": 1.8709, |
|
"step": 60188 |
|
}, |
|
{ |
|
"epoch": 82.88798093292475, |
|
"grad_norm": 2.6191112995147705, |
|
"learning_rate": 1.7038147138964577e-06, |
|
"loss": 1.8486, |
|
"step": 60922 |
|
}, |
|
{ |
|
"epoch": 83.88661899897855, |
|
"grad_norm": 3.241089105606079, |
|
"learning_rate": 1.603814713896458e-06, |
|
"loss": 1.8726, |
|
"step": 61656 |
|
}, |
|
{ |
|
"epoch": 84.88525706503235, |
|
"grad_norm": 1.7192323207855225, |
|
"learning_rate": 1.503814713896458e-06, |
|
"loss": 1.8793, |
|
"step": 62390 |
|
}, |
|
{ |
|
"epoch": 85.88389513108615, |
|
"grad_norm": 2.782193899154663, |
|
"learning_rate": 1.4038147138964579e-06, |
|
"loss": 1.8714, |
|
"step": 63124 |
|
}, |
|
{ |
|
"epoch": 86.88253319713994, |
|
"grad_norm": 2.659689426422119, |
|
"learning_rate": 1.303814713896458e-06, |
|
"loss": 1.8908, |
|
"step": 63858 |
|
}, |
|
{ |
|
"epoch": 87.88117126319374, |
|
"grad_norm": 1.9582741260528564, |
|
"learning_rate": 1.2038147138964579e-06, |
|
"loss": 1.8595, |
|
"step": 64592 |
|
}, |
|
{ |
|
"epoch": 88.87980932924754, |
|
"grad_norm": 1.9837833642959595, |
|
"learning_rate": 1.1039509536784742e-06, |
|
"loss": 1.875, |
|
"step": 65326 |
|
}, |
|
{ |
|
"epoch": 89.87844739530132, |
|
"grad_norm": 2.076040506362915, |
|
"learning_rate": 1.0039509536784741e-06, |
|
"loss": 1.8664, |
|
"step": 66060 |
|
}, |
|
{ |
|
"epoch": 90.87708546135512, |
|
"grad_norm": 1.5430365800857544, |
|
"learning_rate": 9.039509536784742e-07, |
|
"loss": 1.8875, |
|
"step": 66794 |
|
}, |
|
{ |
|
"epoch": 91.87572352740892, |
|
"grad_norm": 1.8234200477600098, |
|
"learning_rate": 8.040871934604905e-07, |
|
"loss": 1.868, |
|
"step": 67528 |
|
}, |
|
{ |
|
"epoch": 92.87436159346272, |
|
"grad_norm": 2.1992905139923096, |
|
"learning_rate": 7.040871934604905e-07, |
|
"loss": 1.8844, |
|
"step": 68262 |
|
}, |
|
{ |
|
"epoch": 93.87299965951651, |
|
"grad_norm": 2.1095755100250244, |
|
"learning_rate": 6.042234332425069e-07, |
|
"loss": 1.8808, |
|
"step": 68996 |
|
}, |
|
{ |
|
"epoch": 94.87163772557031, |
|
"grad_norm": 2.1402716636657715, |
|
"learning_rate": 5.042234332425069e-07, |
|
"loss": 1.8661, |
|
"step": 69730 |
|
}, |
|
{ |
|
"epoch": 95.87027579162411, |
|
"grad_norm": 2.9710769653320312, |
|
"learning_rate": 4.0422343324250687e-07, |
|
"loss": 1.8651, |
|
"step": 70464 |
|
}, |
|
{ |
|
"epoch": 96.8689138576779, |
|
"grad_norm": 1.769300103187561, |
|
"learning_rate": 3.043596730245232e-07, |
|
"loss": 1.8726, |
|
"step": 71198 |
|
}, |
|
{ |
|
"epoch": 97.8675519237317, |
|
"grad_norm": 2.095330238342285, |
|
"learning_rate": 2.0435967302452318e-07, |
|
"loss": 1.8691, |
|
"step": 71932 |
|
}, |
|
{ |
|
"epoch": 98.8661899897855, |
|
"grad_norm": 2.741286039352417, |
|
"learning_rate": 1.0435967302452316e-07, |
|
"loss": 1.8809, |
|
"step": 72666 |
|
}, |
|
{ |
|
"epoch": 99.8648280558393, |
|
"grad_norm": 2.270244836807251, |
|
"learning_rate": 4.49591280653951e-09, |
|
"loss": 1.8619, |
|
"step": 73400 |
|
}, |
|
{ |
|
"epoch": 99.8648280558393, |
|
"step": 73400, |
|
"total_flos": 3.848614248660486e+18, |
|
"train_loss": 1.9516082713779377, |
|
"train_runtime": 79711.1977, |
|
"train_samples_per_second": 184.18, |
|
"train_steps_per_second": 0.921 |
|
} |
|
], |
|
"logging_steps": 734, |
|
"max_steps": 73400, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 100, |
|
"save_steps": 6800, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 3.848614248660486e+18, |
|
"train_batch_size": 50, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|