|
{ |
|
"best_global_step": null, |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 99.48249027237354, |
|
"eval_steps": 500, |
|
"global_step": 19200, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.9961089494163424, |
|
"grad_norm": 0.9873842597007751, |
|
"learning_rate": 9.901562500000001e-06, |
|
"loss": 2.5953, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 1.9909208819714657, |
|
"grad_norm": 1.439842700958252, |
|
"learning_rate": 9.8015625e-06, |
|
"loss": 2.462, |
|
"step": 384 |
|
}, |
|
{ |
|
"epoch": 2.985732814526589, |
|
"grad_norm": 2.307729482650757, |
|
"learning_rate": 9.701562500000001e-06, |
|
"loss": 2.3659, |
|
"step": 576 |
|
}, |
|
{ |
|
"epoch": 3.980544747081712, |
|
"grad_norm": 4.006823539733887, |
|
"learning_rate": 9.6015625e-06, |
|
"loss": 2.3475, |
|
"step": 768 |
|
}, |
|
{ |
|
"epoch": 4.975356679636835, |
|
"grad_norm": 2.971815824508667, |
|
"learning_rate": 9.501562500000001e-06, |
|
"loss": 2.3162, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 5.970168612191959, |
|
"grad_norm": 3.6012327671051025, |
|
"learning_rate": 9.4015625e-06, |
|
"loss": 2.3049, |
|
"step": 1152 |
|
}, |
|
{ |
|
"epoch": 6.964980544747082, |
|
"grad_norm": 3.803527355194092, |
|
"learning_rate": 9.301562500000002e-06, |
|
"loss": 2.2733, |
|
"step": 1344 |
|
}, |
|
{ |
|
"epoch": 7.959792477302205, |
|
"grad_norm": 3.0761234760284424, |
|
"learning_rate": 9.201562500000001e-06, |
|
"loss": 2.2576, |
|
"step": 1536 |
|
}, |
|
{ |
|
"epoch": 8.954604409857328, |
|
"grad_norm": 2.6708500385284424, |
|
"learning_rate": 9.1015625e-06, |
|
"loss": 2.2367, |
|
"step": 1728 |
|
}, |
|
{ |
|
"epoch": 9.949416342412452, |
|
"grad_norm": 3.482462167739868, |
|
"learning_rate": 9.001562500000001e-06, |
|
"loss": 2.176, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 10.944228274967575, |
|
"grad_norm": 2.826169967651367, |
|
"learning_rate": 8.902083333333334e-06, |
|
"loss": 2.2051, |
|
"step": 2112 |
|
}, |
|
{ |
|
"epoch": 11.939040207522698, |
|
"grad_norm": 3.164243698120117, |
|
"learning_rate": 8.802083333333335e-06, |
|
"loss": 2.1874, |
|
"step": 2304 |
|
}, |
|
{ |
|
"epoch": 12.933852140077821, |
|
"grad_norm": 4.725254058837891, |
|
"learning_rate": 8.702083333333334e-06, |
|
"loss": 2.1809, |
|
"step": 2496 |
|
}, |
|
{ |
|
"epoch": 13.928664072632944, |
|
"grad_norm": 3.7806782722473145, |
|
"learning_rate": 8.602083333333333e-06, |
|
"loss": 2.16, |
|
"step": 2688 |
|
}, |
|
{ |
|
"epoch": 14.923476005188068, |
|
"grad_norm": 3.735746145248413, |
|
"learning_rate": 8.502083333333334e-06, |
|
"loss": 2.1663, |
|
"step": 2880 |
|
}, |
|
{ |
|
"epoch": 15.918287937743191, |
|
"grad_norm": 3.401533365249634, |
|
"learning_rate": 8.402083333333334e-06, |
|
"loss": 2.1353, |
|
"step": 3072 |
|
}, |
|
{ |
|
"epoch": 16.913099870298314, |
|
"grad_norm": 3.1076550483703613, |
|
"learning_rate": 8.302083333333335e-06, |
|
"loss": 2.1277, |
|
"step": 3264 |
|
}, |
|
{ |
|
"epoch": 17.907911802853437, |
|
"grad_norm": 3.2938179969787598, |
|
"learning_rate": 8.202083333333334e-06, |
|
"loss": 2.1165, |
|
"step": 3456 |
|
}, |
|
{ |
|
"epoch": 18.90272373540856, |
|
"grad_norm": 3.4992620944976807, |
|
"learning_rate": 8.102083333333333e-06, |
|
"loss": 2.1407, |
|
"step": 3648 |
|
}, |
|
{ |
|
"epoch": 19.897535667963684, |
|
"grad_norm": 5.144476413726807, |
|
"learning_rate": 8.002083333333334e-06, |
|
"loss": 2.1124, |
|
"step": 3840 |
|
}, |
|
{ |
|
"epoch": 20.892347600518807, |
|
"grad_norm": 3.369130849838257, |
|
"learning_rate": 7.902083333333333e-06, |
|
"loss": 2.0994, |
|
"step": 4032 |
|
}, |
|
{ |
|
"epoch": 21.88715953307393, |
|
"grad_norm": 2.5219101905822754, |
|
"learning_rate": 7.802604166666668e-06, |
|
"loss": 2.086, |
|
"step": 4224 |
|
}, |
|
{ |
|
"epoch": 22.881971465629054, |
|
"grad_norm": 2.416583776473999, |
|
"learning_rate": 7.702604166666667e-06, |
|
"loss": 2.1011, |
|
"step": 4416 |
|
}, |
|
{ |
|
"epoch": 23.876783398184177, |
|
"grad_norm": 2.880645513534546, |
|
"learning_rate": 7.602604166666667e-06, |
|
"loss": 2.069, |
|
"step": 4608 |
|
}, |
|
{ |
|
"epoch": 24.8715953307393, |
|
"grad_norm": 3.6908700466156006, |
|
"learning_rate": 7.5026041666666675e-06, |
|
"loss": 2.0878, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 25.866407263294423, |
|
"grad_norm": 3.9834940433502197, |
|
"learning_rate": 7.402604166666667e-06, |
|
"loss": 2.0832, |
|
"step": 4992 |
|
}, |
|
{ |
|
"epoch": 26.861219195849547, |
|
"grad_norm": 4.434403896331787, |
|
"learning_rate": 7.302604166666667e-06, |
|
"loss": 2.0618, |
|
"step": 5184 |
|
}, |
|
{ |
|
"epoch": 27.85603112840467, |
|
"grad_norm": 2.6179683208465576, |
|
"learning_rate": 7.202604166666667e-06, |
|
"loss": 2.0698, |
|
"step": 5376 |
|
}, |
|
{ |
|
"epoch": 28.850843060959793, |
|
"grad_norm": 3.7101306915283203, |
|
"learning_rate": 7.102604166666667e-06, |
|
"loss": 2.0561, |
|
"step": 5568 |
|
}, |
|
{ |
|
"epoch": 29.845654993514916, |
|
"grad_norm": 2.940573215484619, |
|
"learning_rate": 7.002604166666668e-06, |
|
"loss": 2.0499, |
|
"step": 5760 |
|
}, |
|
{ |
|
"epoch": 30.84046692607004, |
|
"grad_norm": 3.1798958778381348, |
|
"learning_rate": 6.902604166666667e-06, |
|
"loss": 2.049, |
|
"step": 5952 |
|
}, |
|
{ |
|
"epoch": 31.835278858625163, |
|
"grad_norm": 3.288163423538208, |
|
"learning_rate": 6.802604166666667e-06, |
|
"loss": 2.0326, |
|
"step": 6144 |
|
}, |
|
{ |
|
"epoch": 32.83009079118028, |
|
"grad_norm": 2.8764078617095947, |
|
"learning_rate": 6.703125e-06, |
|
"loss": 2.0371, |
|
"step": 6336 |
|
}, |
|
{ |
|
"epoch": 33.824902723735406, |
|
"grad_norm": 2.9869441986083984, |
|
"learning_rate": 6.603125e-06, |
|
"loss": 2.0611, |
|
"step": 6528 |
|
}, |
|
{ |
|
"epoch": 34.81971465629053, |
|
"grad_norm": 3.1720468997955322, |
|
"learning_rate": 6.5031250000000005e-06, |
|
"loss": 2.0541, |
|
"step": 6720 |
|
}, |
|
{ |
|
"epoch": 35.81452658884565, |
|
"grad_norm": 3.7449281215667725, |
|
"learning_rate": 6.403125e-06, |
|
"loss": 2.0177, |
|
"step": 6912 |
|
}, |
|
{ |
|
"epoch": 36.809338521400775, |
|
"grad_norm": 2.036007881164551, |
|
"learning_rate": 6.303125000000001e-06, |
|
"loss": 2.006, |
|
"step": 7104 |
|
}, |
|
{ |
|
"epoch": 37.8041504539559, |
|
"grad_norm": 2.677340507507324, |
|
"learning_rate": 6.203125000000001e-06, |
|
"loss": 2.0329, |
|
"step": 7296 |
|
}, |
|
{ |
|
"epoch": 38.79896238651102, |
|
"grad_norm": 2.8975298404693604, |
|
"learning_rate": 6.103125000000001e-06, |
|
"loss": 2.0347, |
|
"step": 7488 |
|
}, |
|
{ |
|
"epoch": 39.793774319066145, |
|
"grad_norm": 1.8516769409179688, |
|
"learning_rate": 6.003125000000001e-06, |
|
"loss": 2.0179, |
|
"step": 7680 |
|
}, |
|
{ |
|
"epoch": 40.78858625162127, |
|
"grad_norm": 2.984468698501587, |
|
"learning_rate": 5.903125e-06, |
|
"loss": 2.0002, |
|
"step": 7872 |
|
}, |
|
{ |
|
"epoch": 41.78339818417639, |
|
"grad_norm": 4.378921031951904, |
|
"learning_rate": 5.8031250000000004e-06, |
|
"loss": 2.0032, |
|
"step": 8064 |
|
}, |
|
{ |
|
"epoch": 42.778210116731515, |
|
"grad_norm": 3.600853204727173, |
|
"learning_rate": 5.7031250000000006e-06, |
|
"loss": 2.0131, |
|
"step": 8256 |
|
}, |
|
{ |
|
"epoch": 43.77302204928664, |
|
"grad_norm": 3.475604295730591, |
|
"learning_rate": 5.603125000000001e-06, |
|
"loss": 2.0167, |
|
"step": 8448 |
|
}, |
|
{ |
|
"epoch": 44.76783398184176, |
|
"grad_norm": 3.7388381958007812, |
|
"learning_rate": 5.503125e-06, |
|
"loss": 2.0286, |
|
"step": 8640 |
|
}, |
|
{ |
|
"epoch": 45.762645914396884, |
|
"grad_norm": 3.0160775184631348, |
|
"learning_rate": 5.403125e-06, |
|
"loss": 2.0327, |
|
"step": 8832 |
|
}, |
|
{ |
|
"epoch": 46.75745784695201, |
|
"grad_norm": 2.4288928508758545, |
|
"learning_rate": 5.303645833333334e-06, |
|
"loss": 1.9837, |
|
"step": 9024 |
|
}, |
|
{ |
|
"epoch": 47.75226977950713, |
|
"grad_norm": 2.6072142124176025, |
|
"learning_rate": 5.203645833333334e-06, |
|
"loss": 2.022, |
|
"step": 9216 |
|
}, |
|
{ |
|
"epoch": 48.747081712062254, |
|
"grad_norm": 3.0784308910369873, |
|
"learning_rate": 5.103645833333334e-06, |
|
"loss": 1.9946, |
|
"step": 9408 |
|
}, |
|
{ |
|
"epoch": 49.74189364461738, |
|
"grad_norm": 2.552196979522705, |
|
"learning_rate": 5.003645833333334e-06, |
|
"loss": 1.9857, |
|
"step": 9600 |
|
}, |
|
{ |
|
"epoch": 50.7367055771725, |
|
"grad_norm": 5.163851261138916, |
|
"learning_rate": 4.903645833333333e-06, |
|
"loss": 2.0038, |
|
"step": 9792 |
|
}, |
|
{ |
|
"epoch": 51.731517509727624, |
|
"grad_norm": 3.9303905963897705, |
|
"learning_rate": 4.8036458333333335e-06, |
|
"loss": 2.0327, |
|
"step": 9984 |
|
}, |
|
{ |
|
"epoch": 52.72632944228275, |
|
"grad_norm": 4.269293308258057, |
|
"learning_rate": 4.703645833333334e-06, |
|
"loss": 1.978, |
|
"step": 10176 |
|
}, |
|
{ |
|
"epoch": 53.72114137483787, |
|
"grad_norm": 3.4755337238311768, |
|
"learning_rate": 4.603645833333334e-06, |
|
"loss": 2.0154, |
|
"step": 10368 |
|
}, |
|
{ |
|
"epoch": 54.715953307392994, |
|
"grad_norm": 2.2201080322265625, |
|
"learning_rate": 4.503645833333334e-06, |
|
"loss": 1.9945, |
|
"step": 10560 |
|
}, |
|
{ |
|
"epoch": 55.71076523994812, |
|
"grad_norm": 3.3768227100372314, |
|
"learning_rate": 4.403645833333334e-06, |
|
"loss": 1.9764, |
|
"step": 10752 |
|
}, |
|
{ |
|
"epoch": 56.70557717250324, |
|
"grad_norm": 2.9863104820251465, |
|
"learning_rate": 4.303645833333334e-06, |
|
"loss": 2.0031, |
|
"step": 10944 |
|
}, |
|
{ |
|
"epoch": 57.70038910505836, |
|
"grad_norm": 3.351330280303955, |
|
"learning_rate": 4.203645833333333e-06, |
|
"loss": 1.9915, |
|
"step": 11136 |
|
}, |
|
{ |
|
"epoch": 58.69520103761349, |
|
"grad_norm": 2.745793104171753, |
|
"learning_rate": 4.103645833333333e-06, |
|
"loss": 2.0054, |
|
"step": 11328 |
|
}, |
|
{ |
|
"epoch": 59.69001297016861, |
|
"grad_norm": 4.333703517913818, |
|
"learning_rate": 4.0036458333333335e-06, |
|
"loss": 1.9629, |
|
"step": 11520 |
|
}, |
|
{ |
|
"epoch": 60.68482490272373, |
|
"grad_norm": 3.1686012744903564, |
|
"learning_rate": 3.903645833333334e-06, |
|
"loss": 2.0011, |
|
"step": 11712 |
|
}, |
|
{ |
|
"epoch": 61.679636835278856, |
|
"grad_norm": 2.1179568767547607, |
|
"learning_rate": 3.804166666666667e-06, |
|
"loss": 1.9949, |
|
"step": 11904 |
|
}, |
|
{ |
|
"epoch": 62.67444876783398, |
|
"grad_norm": 2.4998276233673096, |
|
"learning_rate": 3.704166666666667e-06, |
|
"loss": 1.9791, |
|
"step": 12096 |
|
}, |
|
{ |
|
"epoch": 63.6692607003891, |
|
"grad_norm": 3.710357189178467, |
|
"learning_rate": 3.6041666666666667e-06, |
|
"loss": 1.9757, |
|
"step": 12288 |
|
}, |
|
{ |
|
"epoch": 64.66407263294423, |
|
"grad_norm": 3.4601991176605225, |
|
"learning_rate": 3.504166666666667e-06, |
|
"loss": 2.025, |
|
"step": 12480 |
|
}, |
|
{ |
|
"epoch": 65.65888456549935, |
|
"grad_norm": 2.626007318496704, |
|
"learning_rate": 3.4041666666666665e-06, |
|
"loss": 1.9755, |
|
"step": 12672 |
|
}, |
|
{ |
|
"epoch": 66.65369649805447, |
|
"grad_norm": 3.1454813480377197, |
|
"learning_rate": 3.304166666666667e-06, |
|
"loss": 1.9984, |
|
"step": 12864 |
|
}, |
|
{ |
|
"epoch": 67.6485084306096, |
|
"grad_norm": 7.283568859100342, |
|
"learning_rate": 3.204166666666667e-06, |
|
"loss": 1.9776, |
|
"step": 13056 |
|
}, |
|
{ |
|
"epoch": 68.64332036316472, |
|
"grad_norm": 2.031538724899292, |
|
"learning_rate": 3.104166666666667e-06, |
|
"loss": 1.9939, |
|
"step": 13248 |
|
}, |
|
{ |
|
"epoch": 69.63813229571984, |
|
"grad_norm": 2.370424270629883, |
|
"learning_rate": 3.004166666666667e-06, |
|
"loss": 1.9805, |
|
"step": 13440 |
|
}, |
|
{ |
|
"epoch": 70.63294422827497, |
|
"grad_norm": 3.6357874870300293, |
|
"learning_rate": 2.9041666666666667e-06, |
|
"loss": 1.9998, |
|
"step": 13632 |
|
}, |
|
{ |
|
"epoch": 71.62775616083009, |
|
"grad_norm": 3.61807918548584, |
|
"learning_rate": 2.8041666666666668e-06, |
|
"loss": 1.9615, |
|
"step": 13824 |
|
}, |
|
{ |
|
"epoch": 72.62256809338521, |
|
"grad_norm": 2.440490484237671, |
|
"learning_rate": 2.7041666666666673e-06, |
|
"loss": 1.9715, |
|
"step": 14016 |
|
}, |
|
{ |
|
"epoch": 73.61738002594034, |
|
"grad_norm": 2.490007162094116, |
|
"learning_rate": 2.6046875000000006e-06, |
|
"loss": 1.9765, |
|
"step": 14208 |
|
}, |
|
{ |
|
"epoch": 74.61219195849546, |
|
"grad_norm": 2.368825674057007, |
|
"learning_rate": 2.5046875000000003e-06, |
|
"loss": 1.9819, |
|
"step": 14400 |
|
}, |
|
{ |
|
"epoch": 75.60700389105058, |
|
"grad_norm": 2.9740967750549316, |
|
"learning_rate": 2.4046875000000004e-06, |
|
"loss": 1.9702, |
|
"step": 14592 |
|
}, |
|
{ |
|
"epoch": 76.6018158236057, |
|
"grad_norm": 3.215397834777832, |
|
"learning_rate": 2.3046875e-06, |
|
"loss": 1.9913, |
|
"step": 14784 |
|
}, |
|
{ |
|
"epoch": 77.59662775616083, |
|
"grad_norm": 2.465824604034424, |
|
"learning_rate": 2.2046875000000002e-06, |
|
"loss": 1.9751, |
|
"step": 14976 |
|
}, |
|
{ |
|
"epoch": 78.59143968871595, |
|
"grad_norm": 2.2337405681610107, |
|
"learning_rate": 2.1046875000000003e-06, |
|
"loss": 1.9496, |
|
"step": 15168 |
|
}, |
|
{ |
|
"epoch": 79.58625162127107, |
|
"grad_norm": 2.875598907470703, |
|
"learning_rate": 2.0046875e-06, |
|
"loss": 2.0066, |
|
"step": 15360 |
|
}, |
|
{ |
|
"epoch": 80.5810635538262, |
|
"grad_norm": 2.8990750312805176, |
|
"learning_rate": 1.9046875000000001e-06, |
|
"loss": 1.9466, |
|
"step": 15552 |
|
}, |
|
{ |
|
"epoch": 81.57587548638132, |
|
"grad_norm": 4.734499454498291, |
|
"learning_rate": 1.8046875000000002e-06, |
|
"loss": 1.9636, |
|
"step": 15744 |
|
}, |
|
{ |
|
"epoch": 82.57068741893644, |
|
"grad_norm": 2.9932515621185303, |
|
"learning_rate": 1.7046875000000001e-06, |
|
"loss": 1.973, |
|
"step": 15936 |
|
}, |
|
{ |
|
"epoch": 83.56549935149157, |
|
"grad_norm": 4.040909767150879, |
|
"learning_rate": 1.6046875e-06, |
|
"loss": 1.9678, |
|
"step": 16128 |
|
}, |
|
{ |
|
"epoch": 84.56031128404669, |
|
"grad_norm": 3.0035159587860107, |
|
"learning_rate": 1.5046875000000002e-06, |
|
"loss": 1.9457, |
|
"step": 16320 |
|
}, |
|
{ |
|
"epoch": 85.55512321660181, |
|
"grad_norm": 2.5469093322753906, |
|
"learning_rate": 1.4046875e-06, |
|
"loss": 1.9642, |
|
"step": 16512 |
|
}, |
|
{ |
|
"epoch": 86.54993514915694, |
|
"grad_norm": 2.850858688354492, |
|
"learning_rate": 1.3046875e-06, |
|
"loss": 1.9638, |
|
"step": 16704 |
|
}, |
|
{ |
|
"epoch": 87.54474708171206, |
|
"grad_norm": 2.5728836059570312, |
|
"learning_rate": 1.2046875e-06, |
|
"loss": 1.9681, |
|
"step": 16896 |
|
}, |
|
{ |
|
"epoch": 88.53955901426718, |
|
"grad_norm": 1.4205690622329712, |
|
"learning_rate": 1.1046875000000002e-06, |
|
"loss": 1.9585, |
|
"step": 17088 |
|
}, |
|
{ |
|
"epoch": 89.5343709468223, |
|
"grad_norm": 1.9591172933578491, |
|
"learning_rate": 1.0046875e-06, |
|
"loss": 1.9894, |
|
"step": 17280 |
|
}, |
|
{ |
|
"epoch": 90.52918287937743, |
|
"grad_norm": 3.981717348098755, |
|
"learning_rate": 9.046875000000001e-07, |
|
"loss": 1.9388, |
|
"step": 17472 |
|
}, |
|
{ |
|
"epoch": 91.52399481193255, |
|
"grad_norm": 2.348127841949463, |
|
"learning_rate": 8.046875000000001e-07, |
|
"loss": 1.9677, |
|
"step": 17664 |
|
}, |
|
{ |
|
"epoch": 92.51880674448768, |
|
"grad_norm": 3.0600669384002686, |
|
"learning_rate": 7.046875e-07, |
|
"loss": 1.9747, |
|
"step": 17856 |
|
}, |
|
{ |
|
"epoch": 93.5136186770428, |
|
"grad_norm": 3.865741491317749, |
|
"learning_rate": 6.046875000000001e-07, |
|
"loss": 1.958, |
|
"step": 18048 |
|
}, |
|
{ |
|
"epoch": 94.50843060959792, |
|
"grad_norm": 2.755720853805542, |
|
"learning_rate": 5.052083333333334e-07, |
|
"loss": 1.975, |
|
"step": 18240 |
|
}, |
|
{ |
|
"epoch": 95.50324254215305, |
|
"grad_norm": 3.4946985244750977, |
|
"learning_rate": 4.0520833333333335e-07, |
|
"loss": 1.9426, |
|
"step": 18432 |
|
}, |
|
{ |
|
"epoch": 96.49805447470817, |
|
"grad_norm": 2.993678331375122, |
|
"learning_rate": 3.0520833333333336e-07, |
|
"loss": 1.9497, |
|
"step": 18624 |
|
}, |
|
{ |
|
"epoch": 97.49286640726329, |
|
"grad_norm": 3.959327459335327, |
|
"learning_rate": 2.0520833333333334e-07, |
|
"loss": 1.9409, |
|
"step": 18816 |
|
}, |
|
{ |
|
"epoch": 98.48767833981842, |
|
"grad_norm": 2.147462844848633, |
|
"learning_rate": 1.0520833333333334e-07, |
|
"loss": 1.9485, |
|
"step": 19008 |
|
}, |
|
{ |
|
"epoch": 99.48249027237354, |
|
"grad_norm": 2.728996992111206, |
|
"learning_rate": 5.208333333333334e-09, |
|
"loss": 1.981, |
|
"step": 19200 |
|
}, |
|
{ |
|
"epoch": 99.48249027237354, |
|
"step": 19200, |
|
"total_flos": 1.0064407234989773e+18, |
|
"train_loss": 2.0492228651046753, |
|
"train_runtime": 20851.1432, |
|
"train_samples_per_second": 184.834, |
|
"train_steps_per_second": 0.921 |
|
} |
|
], |
|
"logging_steps": 192, |
|
"max_steps": 19200, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 100, |
|
"save_steps": 6800, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.0064407234989773e+18, |
|
"train_batch_size": 50, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|