|
{ |
|
"best_metric": 1.1252893209457397, |
|
"best_model_checkpoint": "miner_id_24/checkpoint-75", |
|
"epoch": 0.7735368956743003, |
|
"eval_steps": 25, |
|
"global_step": 95, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.008142493638676845, |
|
"grad_norm": 0.7951357364654541, |
|
"learning_rate": 3.3333333333333335e-05, |
|
"loss": 2.1777, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.008142493638676845, |
|
"eval_loss": 2.6585562229156494, |
|
"eval_runtime": 3.2014, |
|
"eval_samples_per_second": 15.618, |
|
"eval_steps_per_second": 4.061, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.01628498727735369, |
|
"grad_norm": 0.920566976070404, |
|
"learning_rate": 6.666666666666667e-05, |
|
"loss": 2.1561, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.024427480916030534, |
|
"grad_norm": 0.9564467668533325, |
|
"learning_rate": 0.0001, |
|
"loss": 2.2048, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.03256997455470738, |
|
"grad_norm": 1.0686429738998413, |
|
"learning_rate": 9.997376600647783e-05, |
|
"loss": 2.1696, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.04071246819338423, |
|
"grad_norm": 1.0367040634155273, |
|
"learning_rate": 9.989509461357426e-05, |
|
"loss": 1.8942, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.04885496183206107, |
|
"grad_norm": 0.8149427175521851, |
|
"learning_rate": 9.976407754861426e-05, |
|
"loss": 1.6377, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.056997455470737916, |
|
"grad_norm": 0.7105250358581543, |
|
"learning_rate": 9.958086757163489e-05, |
|
"loss": 1.3655, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.06513994910941476, |
|
"grad_norm": 0.7786738872528076, |
|
"learning_rate": 9.934567829727386e-05, |
|
"loss": 1.3318, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.0732824427480916, |
|
"grad_norm": 0.6053236126899719, |
|
"learning_rate": 9.905878394570453e-05, |
|
"loss": 1.1656, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.08142493638676845, |
|
"grad_norm": 0.49749094247817993, |
|
"learning_rate": 9.872051902290737e-05, |
|
"loss": 1.1655, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.08956743002544529, |
|
"grad_norm": 0.5606850981712341, |
|
"learning_rate": 9.833127793065098e-05, |
|
"loss": 1.0697, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.09770992366412214, |
|
"grad_norm": 0.6993449330329895, |
|
"learning_rate": 9.789151450663723e-05, |
|
"loss": 0.9941, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.10585241730279898, |
|
"grad_norm": 0.5706828832626343, |
|
"learning_rate": 9.740174149534693e-05, |
|
"loss": 1.4403, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.11399491094147583, |
|
"grad_norm": 0.6802021861076355, |
|
"learning_rate": 9.686252995020249e-05, |
|
"loss": 1.4688, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.12213740458015267, |
|
"grad_norm": 0.5219370722770691, |
|
"learning_rate": 9.627450856774539e-05, |
|
"loss": 1.3689, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.13027989821882952, |
|
"grad_norm": 0.41930851340293884, |
|
"learning_rate": 9.563836295460398e-05, |
|
"loss": 1.3572, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.13842239185750635, |
|
"grad_norm": 0.47524869441986084, |
|
"learning_rate": 9.495483482810688e-05, |
|
"loss": 1.3036, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.1465648854961832, |
|
"grad_norm": 0.417620450258255, |
|
"learning_rate": 9.422472115147382e-05, |
|
"loss": 1.2112, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.15470737913486005, |
|
"grad_norm": 0.5184525847434998, |
|
"learning_rate": 9.3448873204592e-05, |
|
"loss": 1.2041, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.1628498727735369, |
|
"grad_norm": 0.43881309032440186, |
|
"learning_rate": 9.2628195591462e-05, |
|
"loss": 1.1282, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.17099236641221374, |
|
"grad_norm": 0.38495856523513794, |
|
"learning_rate": 9.176364518546989e-05, |
|
"loss": 1.1295, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.17913486005089058, |
|
"grad_norm": 0.3401827812194824, |
|
"learning_rate": 9.08562300137157e-05, |
|
"loss": 1.103, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.18727735368956744, |
|
"grad_norm": 0.3839218020439148, |
|
"learning_rate": 8.990700808169889e-05, |
|
"loss": 1.057, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.19541984732824427, |
|
"grad_norm": 0.42890799045562744, |
|
"learning_rate": 8.891708613973126e-05, |
|
"loss": 0.9879, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.2035623409669211, |
|
"grad_norm": 0.4219408631324768, |
|
"learning_rate": 8.788761839251559e-05, |
|
"loss": 0.7933, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.2035623409669211, |
|
"eval_loss": 1.1750500202178955, |
|
"eval_runtime": 3.237, |
|
"eval_samples_per_second": 15.446, |
|
"eval_steps_per_second": 4.016, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.21170483460559797, |
|
"grad_norm": 0.5137602090835571, |
|
"learning_rate": 8.681980515339464e-05, |
|
"loss": 1.4807, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.2198473282442748, |
|
"grad_norm": 0.5336816906929016, |
|
"learning_rate": 8.571489144483944e-05, |
|
"loss": 1.4194, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.22798982188295167, |
|
"grad_norm": 0.37356939911842346, |
|
"learning_rate": 8.457416554680877e-05, |
|
"loss": 1.3018, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.2361323155216285, |
|
"grad_norm": 0.34776827692985535, |
|
"learning_rate": 8.339895749467238e-05, |
|
"loss": 1.2163, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.24427480916030533, |
|
"grad_norm": 0.3022593557834625, |
|
"learning_rate": 8.219063752844926e-05, |
|
"loss": 1.1786, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.25241730279898217, |
|
"grad_norm": 0.3063591420650482, |
|
"learning_rate": 8.095061449516903e-05, |
|
"loss": 1.1727, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.26055979643765903, |
|
"grad_norm": 0.3639509677886963, |
|
"learning_rate": 7.968033420621935e-05, |
|
"loss": 1.2463, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.2687022900763359, |
|
"grad_norm": 0.40702012181282043, |
|
"learning_rate": 7.838127775159452e-05, |
|
"loss": 1.0818, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.2768447837150127, |
|
"grad_norm": 0.40854519605636597, |
|
"learning_rate": 7.705495977301078e-05, |
|
"loss": 1.0641, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.28498727735368956, |
|
"grad_norm": 0.43549180030822754, |
|
"learning_rate": 7.570292669790186e-05, |
|
"loss": 1.0249, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.2931297709923664, |
|
"grad_norm": 0.44407615065574646, |
|
"learning_rate": 7.43267549363537e-05, |
|
"loss": 1.0178, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.30127226463104323, |
|
"grad_norm": 0.45777806639671326, |
|
"learning_rate": 7.292804904308087e-05, |
|
"loss": 0.8703, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.3094147582697201, |
|
"grad_norm": 0.30209803581237793, |
|
"learning_rate": 7.150843984658754e-05, |
|
"loss": 1.3586, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.31755725190839695, |
|
"grad_norm": 0.27303484082221985, |
|
"learning_rate": 7.006958254769438e-05, |
|
"loss": 1.3765, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.3256997455470738, |
|
"grad_norm": 0.31225576996803284, |
|
"learning_rate": 6.861315478964841e-05, |
|
"loss": 1.3231, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.3338422391857506, |
|
"grad_norm": 0.3471241891384125, |
|
"learning_rate": 6.714085470206609e-05, |
|
"loss": 1.2386, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.3419847328244275, |
|
"grad_norm": 0.4284417927265167, |
|
"learning_rate": 6.56543989209901e-05, |
|
"loss": 1.2152, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.35012722646310435, |
|
"grad_norm": 0.3678395748138428, |
|
"learning_rate": 6.415552058736854e-05, |
|
"loss": 1.1708, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.35826972010178115, |
|
"grad_norm": 0.3918590247631073, |
|
"learning_rate": 6.264596732629e-05, |
|
"loss": 1.2062, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.366412213740458, |
|
"grad_norm": 0.30557262897491455, |
|
"learning_rate": 6.112749920933111e-05, |
|
"loss": 1.115, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.3745547073791349, |
|
"grad_norm": 0.3059096336364746, |
|
"learning_rate": 5.960188670239154e-05, |
|
"loss": 1.0529, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.3826972010178117, |
|
"grad_norm": 0.3168966770172119, |
|
"learning_rate": 5.80709086014102e-05, |
|
"loss": 1.0206, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.39083969465648855, |
|
"grad_norm": 0.3171919584274292, |
|
"learning_rate": 5.653634995836856e-05, |
|
"loss": 0.9549, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.3989821882951654, |
|
"grad_norm": 0.3663451373577118, |
|
"learning_rate": 5.500000000000001e-05, |
|
"loss": 0.8722, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.4071246819338422, |
|
"grad_norm": 0.3894191384315491, |
|
"learning_rate": 5.346365004163145e-05, |
|
"loss": 0.8173, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.4071246819338422, |
|
"eval_loss": 1.1385380029678345, |
|
"eval_runtime": 3.2577, |
|
"eval_samples_per_second": 15.348, |
|
"eval_steps_per_second": 3.99, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.4152671755725191, |
|
"grad_norm": 0.25706854462623596, |
|
"learning_rate": 5.192909139858981e-05, |
|
"loss": 1.3684, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.42340966921119594, |
|
"grad_norm": 0.2687229514122009, |
|
"learning_rate": 5.0398113297608465e-05, |
|
"loss": 1.2704, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.4315521628498728, |
|
"grad_norm": 0.2694632411003113, |
|
"learning_rate": 4.887250079066892e-05, |
|
"loss": 1.2929, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.4396946564885496, |
|
"grad_norm": 0.27409684658050537, |
|
"learning_rate": 4.7354032673710005e-05, |
|
"loss": 1.1768, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.44783715012722647, |
|
"grad_norm": 0.2727748453617096, |
|
"learning_rate": 4.584447941263149e-05, |
|
"loss": 1.1819, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.45597964376590333, |
|
"grad_norm": 0.29984185099601746, |
|
"learning_rate": 4.43456010790099e-05, |
|
"loss": 1.1877, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.46412213740458014, |
|
"grad_norm": 0.2994973957538605, |
|
"learning_rate": 4.285914529793391e-05, |
|
"loss": 1.1334, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.472264631043257, |
|
"grad_norm": 0.3209475576877594, |
|
"learning_rate": 4.13868452103516e-05, |
|
"loss": 1.1666, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.48040712468193386, |
|
"grad_norm": 0.317921906709671, |
|
"learning_rate": 3.9930417452305626e-05, |
|
"loss": 1.029, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.48854961832061067, |
|
"grad_norm": 0.32125380635261536, |
|
"learning_rate": 3.8491560153412466e-05, |
|
"loss": 0.9528, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.49669211195928753, |
|
"grad_norm": 0.30410146713256836, |
|
"learning_rate": 3.707195095691913e-05, |
|
"loss": 0.9018, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.5048346055979643, |
|
"grad_norm": 0.39632368087768555, |
|
"learning_rate": 3.567324506364632e-05, |
|
"loss": 0.9189, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.5129770992366413, |
|
"grad_norm": 0.28354790806770325, |
|
"learning_rate": 3.4297073302098156e-05, |
|
"loss": 1.2053, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.5211195928753181, |
|
"grad_norm": 0.23840811848640442, |
|
"learning_rate": 3.2945040226989244e-05, |
|
"loss": 1.3452, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.5292620865139949, |
|
"grad_norm": 0.2518406808376312, |
|
"learning_rate": 3.16187222484055e-05, |
|
"loss": 1.2796, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.5374045801526718, |
|
"grad_norm": 0.3083972632884979, |
|
"learning_rate": 3.0319665793780648e-05, |
|
"loss": 1.2915, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.5455470737913486, |
|
"grad_norm": 0.28872764110565186, |
|
"learning_rate": 2.9049385504830985e-05, |
|
"loss": 1.2188, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.5536895674300254, |
|
"grad_norm": 0.281512588262558, |
|
"learning_rate": 2.7809362471550748e-05, |
|
"loss": 1.1432, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.5618320610687023, |
|
"grad_norm": 0.2897692918777466, |
|
"learning_rate": 2.660104250532764e-05, |
|
"loss": 1.197, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.5699745547073791, |
|
"grad_norm": 0.28706327080726624, |
|
"learning_rate": 2.5425834453191232e-05, |
|
"loss": 1.065, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.5781170483460559, |
|
"grad_norm": 0.30318784713745117, |
|
"learning_rate": 2.4285108555160577e-05, |
|
"loss": 0.9742, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.5862595419847328, |
|
"grad_norm": 0.310880571603775, |
|
"learning_rate": 2.3180194846605367e-05, |
|
"loss": 0.9485, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.5944020356234097, |
|
"grad_norm": 0.33773961663246155, |
|
"learning_rate": 2.2112381607484417e-05, |
|
"loss": 0.98, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.6025445292620865, |
|
"grad_norm": 0.35677966475486755, |
|
"learning_rate": 2.1082913860268765e-05, |
|
"loss": 0.8608, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.6106870229007634, |
|
"grad_norm": 0.38845735788345337, |
|
"learning_rate": 2.0092991918301108e-05, |
|
"loss": 0.787, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.6106870229007634, |
|
"eval_loss": 1.1252893209457397, |
|
"eval_runtime": 3.2469, |
|
"eval_samples_per_second": 15.399, |
|
"eval_steps_per_second": 4.004, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.6188295165394402, |
|
"grad_norm": 0.2565176784992218, |
|
"learning_rate": 1.91437699862843e-05, |
|
"loss": 1.331, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.6269720101781171, |
|
"grad_norm": 0.2451658397912979, |
|
"learning_rate": 1.8236354814530112e-05, |
|
"loss": 1.2461, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.6351145038167939, |
|
"grad_norm": 0.2756778597831726, |
|
"learning_rate": 1.7371804408538024e-05, |
|
"loss": 1.2637, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.6432569974554707, |
|
"grad_norm": 0.2695421874523163, |
|
"learning_rate": 1.6551126795408016e-05, |
|
"loss": 1.2262, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.6513994910941476, |
|
"grad_norm": 0.288644403219223, |
|
"learning_rate": 1.577527884852619e-05, |
|
"loss": 1.1969, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.6595419847328244, |
|
"grad_norm": 0.29181233048439026, |
|
"learning_rate": 1.5045165171893116e-05, |
|
"loss": 1.163, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.6676844783715012, |
|
"grad_norm": 0.2998856008052826, |
|
"learning_rate": 1.4361637045396029e-05, |
|
"loss": 1.0569, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.6758269720101782, |
|
"grad_norm": 0.3256518244743347, |
|
"learning_rate": 1.3725491432254624e-05, |
|
"loss": 1.0537, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.683969465648855, |
|
"grad_norm": 0.29111307859420776, |
|
"learning_rate": 1.313747004979751e-05, |
|
"loss": 1.0298, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.6921119592875318, |
|
"grad_norm": 0.32174497842788696, |
|
"learning_rate": 1.2598258504653081e-05, |
|
"loss": 0.9976, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.7002544529262087, |
|
"grad_norm": 0.36173349618911743, |
|
"learning_rate": 1.2108485493362765e-05, |
|
"loss": 0.9361, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.7083969465648855, |
|
"grad_norm": 0.3511975407600403, |
|
"learning_rate": 1.1668722069349041e-05, |
|
"loss": 0.8343, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.7165394402035623, |
|
"grad_norm": 0.2947863042354584, |
|
"learning_rate": 1.1279480977092635e-05, |
|
"loss": 1.3039, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.7246819338422392, |
|
"grad_norm": 0.2641090154647827, |
|
"learning_rate": 1.094121605429547e-05, |
|
"loss": 1.2791, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.732824427480916, |
|
"grad_norm": 0.2599228024482727, |
|
"learning_rate": 1.0654321702726141e-05, |
|
"loss": 1.2946, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.7409669211195928, |
|
"grad_norm": 0.2754274308681488, |
|
"learning_rate": 1.0419132428365116e-05, |
|
"loss": 1.2478, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.7491094147582698, |
|
"grad_norm": 0.2846546173095703, |
|
"learning_rate": 1.0235922451385733e-05, |
|
"loss": 1.2817, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.7572519083969466, |
|
"grad_norm": 0.29695919156074524, |
|
"learning_rate": 1.0104905386425733e-05, |
|
"loss": 1.2161, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.7653944020356234, |
|
"grad_norm": 0.29593130946159363, |
|
"learning_rate": 1.002623399352217e-05, |
|
"loss": 1.1682, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.7735368956743003, |
|
"grad_norm": 0.2888301908969879, |
|
"learning_rate": 1e-05, |
|
"loss": 1.0711, |
|
"step": 95 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 95, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 25, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 1, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 0 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.0685709840233267e+18, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|