|
{ |
|
"best_metric": 0.907011091709137, |
|
"best_model_checkpoint": "miner_id_24/checkpoint-200", |
|
"epoch": 0.007006358270130143, |
|
"eval_steps": 50, |
|
"global_step": 200, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 3.5031791350650716e-05, |
|
"grad_norm": 12.662116050720215, |
|
"learning_rate": 1.008e-05, |
|
"loss": 2.7705, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 3.5031791350650716e-05, |
|
"eval_loss": 3.4625937938690186, |
|
"eval_runtime": 2057.3933, |
|
"eval_samples_per_second": 5.842, |
|
"eval_steps_per_second": 1.461, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 7.006358270130143e-05, |
|
"grad_norm": 16.384368896484375, |
|
"learning_rate": 2.016e-05, |
|
"loss": 3.3597, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.00010509537405195214, |
|
"grad_norm": 20.63599395751953, |
|
"learning_rate": 3.024e-05, |
|
"loss": 4.0404, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.00014012716540260286, |
|
"grad_norm": 13.627049446105957, |
|
"learning_rate": 4.032e-05, |
|
"loss": 2.6582, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.00017515895675325357, |
|
"grad_norm": 14.674355506896973, |
|
"learning_rate": 5.04e-05, |
|
"loss": 3.0364, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.00021019074810390428, |
|
"grad_norm": 12.088839530944824, |
|
"learning_rate": 6.048e-05, |
|
"loss": 2.7996, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.000245222539454555, |
|
"grad_norm": 13.726351737976074, |
|
"learning_rate": 7.055999999999999e-05, |
|
"loss": 2.8547, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.00028025433080520573, |
|
"grad_norm": 9.592828750610352, |
|
"learning_rate": 8.064e-05, |
|
"loss": 1.8008, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.00031528612215585646, |
|
"grad_norm": 19.422744750976562, |
|
"learning_rate": 9.072e-05, |
|
"loss": 2.4474, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.00035031791350650715, |
|
"grad_norm": 14.529265403747559, |
|
"learning_rate": 0.0001008, |
|
"loss": 2.1343, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.0003853497048571579, |
|
"grad_norm": 12.239635467529297, |
|
"learning_rate": 0.00010026947368421052, |
|
"loss": 2.4732, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.00042038149620780857, |
|
"grad_norm": 9.195387840270996, |
|
"learning_rate": 9.973894736842104e-05, |
|
"loss": 1.5237, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.0004554132875584593, |
|
"grad_norm": 9.380248069763184, |
|
"learning_rate": 9.920842105263157e-05, |
|
"loss": 1.9376, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.00049044507890911, |
|
"grad_norm": 10.060378074645996, |
|
"learning_rate": 9.86778947368421e-05, |
|
"loss": 2.0215, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.0005254768702597607, |
|
"grad_norm": 9.508438110351562, |
|
"learning_rate": 9.814736842105264e-05, |
|
"loss": 1.7224, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.0005605086616104115, |
|
"grad_norm": 10.150548934936523, |
|
"learning_rate": 9.761684210526316e-05, |
|
"loss": 1.8087, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.0005955404529610622, |
|
"grad_norm": 11.015765190124512, |
|
"learning_rate": 9.708631578947368e-05, |
|
"loss": 1.4277, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.0006305722443117129, |
|
"grad_norm": 10.192193031311035, |
|
"learning_rate": 9.655578947368421e-05, |
|
"loss": 1.3218, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.0006656040356623636, |
|
"grad_norm": 9.296463012695312, |
|
"learning_rate": 9.602526315789473e-05, |
|
"loss": 1.6145, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.0007006358270130143, |
|
"grad_norm": 10.011585235595703, |
|
"learning_rate": 9.549473684210525e-05, |
|
"loss": 1.5533, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.000735667618363665, |
|
"grad_norm": 10.020069122314453, |
|
"learning_rate": 9.496421052631579e-05, |
|
"loss": 1.6376, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.0007706994097143158, |
|
"grad_norm": 7.785070896148682, |
|
"learning_rate": 9.443368421052631e-05, |
|
"loss": 1.3359, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.0008057312010649665, |
|
"grad_norm": 6.168704509735107, |
|
"learning_rate": 9.390315789473683e-05, |
|
"loss": 1.0551, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.0008407629924156171, |
|
"grad_norm": 5.922638416290283, |
|
"learning_rate": 9.337263157894737e-05, |
|
"loss": 0.9874, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.0008757947837662679, |
|
"grad_norm": 5.987689018249512, |
|
"learning_rate": 9.28421052631579e-05, |
|
"loss": 0.6966, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.0009108265751169186, |
|
"grad_norm": 6.146247386932373, |
|
"learning_rate": 9.231157894736842e-05, |
|
"loss": 1.022, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.0009458583664675693, |
|
"grad_norm": 5.5729079246521, |
|
"learning_rate": 9.178105263157895e-05, |
|
"loss": 1.0427, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.00098089015781822, |
|
"grad_norm": 8.170185089111328, |
|
"learning_rate": 9.125052631578948e-05, |
|
"loss": 1.6254, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.0010159219491688708, |
|
"grad_norm": 4.396035194396973, |
|
"learning_rate": 9.072e-05, |
|
"loss": 0.733, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.0010509537405195214, |
|
"grad_norm": 6.016524791717529, |
|
"learning_rate": 9.018947368421052e-05, |
|
"loss": 1.0773, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.0010859855318701723, |
|
"grad_norm": 5.453558444976807, |
|
"learning_rate": 8.965894736842104e-05, |
|
"loss": 0.8922, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.001121017323220823, |
|
"grad_norm": 6.520124435424805, |
|
"learning_rate": 8.912842105263157e-05, |
|
"loss": 0.7138, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.0011560491145714735, |
|
"grad_norm": 5.270682334899902, |
|
"learning_rate": 8.85978947368421e-05, |
|
"loss": 0.705, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.0011910809059221244, |
|
"grad_norm": 6.4010162353515625, |
|
"learning_rate": 8.806736842105264e-05, |
|
"loss": 0.9197, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.001226112697272775, |
|
"grad_norm": 6.343068599700928, |
|
"learning_rate": 8.753684210526316e-05, |
|
"loss": 0.9759, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.0012611444886234259, |
|
"grad_norm": 7.611424446105957, |
|
"learning_rate": 8.700631578947369e-05, |
|
"loss": 0.9741, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.0012961762799740765, |
|
"grad_norm": 6.170362949371338, |
|
"learning_rate": 8.647578947368421e-05, |
|
"loss": 1.2902, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.0013312080713247271, |
|
"grad_norm": 6.117220401763916, |
|
"learning_rate": 8.594526315789473e-05, |
|
"loss": 0.7671, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.001366239862675378, |
|
"grad_norm": 7.04514217376709, |
|
"learning_rate": 8.541473684210525e-05, |
|
"loss": 1.036, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.0014012716540260286, |
|
"grad_norm": 9.130364418029785, |
|
"learning_rate": 8.488421052631578e-05, |
|
"loss": 1.1909, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.0014363034453766794, |
|
"grad_norm": 4.37771463394165, |
|
"learning_rate": 8.435368421052631e-05, |
|
"loss": 0.5097, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.00147133523672733, |
|
"grad_norm": 5.883099555969238, |
|
"learning_rate": 8.382315789473684e-05, |
|
"loss": 0.8581, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.0015063670280779807, |
|
"grad_norm": 6.91945219039917, |
|
"learning_rate": 8.329263157894737e-05, |
|
"loss": 1.3828, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.0015413988194286315, |
|
"grad_norm": 6.779603004455566, |
|
"learning_rate": 8.27621052631579e-05, |
|
"loss": 1.1814, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.0015764306107792822, |
|
"grad_norm": 6.361364364624023, |
|
"learning_rate": 8.223157894736842e-05, |
|
"loss": 0.8581, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.001611462402129933, |
|
"grad_norm": 5.481888294219971, |
|
"learning_rate": 8.170105263157894e-05, |
|
"loss": 0.7128, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.0016464941934805836, |
|
"grad_norm": 6.276697158813477, |
|
"learning_rate": 8.117052631578946e-05, |
|
"loss": 1.0992, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.0016815259848312343, |
|
"grad_norm": 6.994898319244385, |
|
"learning_rate": 8.064e-05, |
|
"loss": 0.8142, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.001716557776181885, |
|
"grad_norm": 4.883788585662842, |
|
"learning_rate": 8.010947368421052e-05, |
|
"loss": 0.5797, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.0017515895675325357, |
|
"grad_norm": 6.223549842834473, |
|
"learning_rate": 7.957894736842105e-05, |
|
"loss": 1.2047, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.0017515895675325357, |
|
"eval_loss": 1.1290020942687988, |
|
"eval_runtime": 2061.5746, |
|
"eval_samples_per_second": 5.83, |
|
"eval_steps_per_second": 1.458, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.0017866213588831866, |
|
"grad_norm": 5.159853458404541, |
|
"learning_rate": 7.904842105263158e-05, |
|
"loss": 1.027, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.0018216531502338372, |
|
"grad_norm": 5.4184088706970215, |
|
"learning_rate": 7.85178947368421e-05, |
|
"loss": 1.286, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.0018566849415844878, |
|
"grad_norm": 6.966073989868164, |
|
"learning_rate": 7.798736842105263e-05, |
|
"loss": 1.4248, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.0018917167329351387, |
|
"grad_norm": 6.474468231201172, |
|
"learning_rate": 7.745684210526315e-05, |
|
"loss": 1.5506, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.0019267485242857893, |
|
"grad_norm": 6.7876811027526855, |
|
"learning_rate": 7.692631578947369e-05, |
|
"loss": 1.6668, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.00196178031563644, |
|
"grad_norm": 6.088242530822754, |
|
"learning_rate": 7.639578947368421e-05, |
|
"loss": 1.4712, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.0019968121069870908, |
|
"grad_norm": 4.957919597625732, |
|
"learning_rate": 7.586526315789473e-05, |
|
"loss": 1.2942, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.0020318438983377416, |
|
"grad_norm": 5.453049659729004, |
|
"learning_rate": 7.533473684210526e-05, |
|
"loss": 1.1897, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.002066875689688392, |
|
"grad_norm": 8.739358901977539, |
|
"learning_rate": 7.480421052631578e-05, |
|
"loss": 1.2799, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.002101907481039043, |
|
"grad_norm": 8.237629890441895, |
|
"learning_rate": 7.427368421052632e-05, |
|
"loss": 1.5075, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.0021369392723896937, |
|
"grad_norm": 6.064999580383301, |
|
"learning_rate": 7.374315789473685e-05, |
|
"loss": 1.4072, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.0021719710637403446, |
|
"grad_norm": 7.591026306152344, |
|
"learning_rate": 7.321263157894737e-05, |
|
"loss": 1.4841, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.002207002855090995, |
|
"grad_norm": 6.479332447052002, |
|
"learning_rate": 7.26821052631579e-05, |
|
"loss": 1.3464, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.002242034646441646, |
|
"grad_norm": 7.660516738891602, |
|
"learning_rate": 7.215157894736842e-05, |
|
"loss": 1.3947, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.0022770664377922967, |
|
"grad_norm": 6.516019821166992, |
|
"learning_rate": 7.162105263157894e-05, |
|
"loss": 0.9144, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.002312098229142947, |
|
"grad_norm": 8.34161376953125, |
|
"learning_rate": 7.109052631578947e-05, |
|
"loss": 1.6912, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.002347130020493598, |
|
"grad_norm": 10.69320297241211, |
|
"learning_rate": 7.055999999999999e-05, |
|
"loss": 1.9, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.0023821618118442488, |
|
"grad_norm": 10.620810508728027, |
|
"learning_rate": 7.002947368421052e-05, |
|
"loss": 1.0454, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.002417193603194899, |
|
"grad_norm": 7.011707305908203, |
|
"learning_rate": 6.949894736842105e-05, |
|
"loss": 0.8123, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.00245222539454555, |
|
"grad_norm": 7.232404708862305, |
|
"learning_rate": 6.896842105263158e-05, |
|
"loss": 1.4846, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.002487257185896201, |
|
"grad_norm": 7.875359535217285, |
|
"learning_rate": 6.843789473684211e-05, |
|
"loss": 1.2222, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.0025222889772468517, |
|
"grad_norm": 6.384848594665527, |
|
"learning_rate": 6.790736842105263e-05, |
|
"loss": 1.0321, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.002557320768597502, |
|
"grad_norm": 6.461179733276367, |
|
"learning_rate": 6.737684210526315e-05, |
|
"loss": 0.9518, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.002592352559948153, |
|
"grad_norm": 7.334846019744873, |
|
"learning_rate": 6.684631578947368e-05, |
|
"loss": 1.2871, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.002627384351298804, |
|
"grad_norm": 5.616960048675537, |
|
"learning_rate": 6.631578947368421e-05, |
|
"loss": 0.9077, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.0026624161426494542, |
|
"grad_norm": 4.717360496520996, |
|
"learning_rate": 6.578526315789473e-05, |
|
"loss": 0.6344, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.002697447934000105, |
|
"grad_norm": 4.63266658782959, |
|
"learning_rate": 6.525473684210526e-05, |
|
"loss": 0.7442, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.002732479725350756, |
|
"grad_norm": 6.365645408630371, |
|
"learning_rate": 6.47242105263158e-05, |
|
"loss": 0.9088, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.0027675115167014063, |
|
"grad_norm": 5.476707935333252, |
|
"learning_rate": 6.419368421052632e-05, |
|
"loss": 0.7585, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.002802543308052057, |
|
"grad_norm": 6.600567817687988, |
|
"learning_rate": 6.366315789473684e-05, |
|
"loss": 1.1681, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.002837575099402708, |
|
"grad_norm": 4.197580337524414, |
|
"learning_rate": 6.313263157894736e-05, |
|
"loss": 0.5965, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.002872606890753359, |
|
"grad_norm": 5.1230788230896, |
|
"learning_rate": 6.26021052631579e-05, |
|
"loss": 0.978, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.0029076386821040093, |
|
"grad_norm": 5.09016752243042, |
|
"learning_rate": 6.207157894736842e-05, |
|
"loss": 0.663, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.00294267047345466, |
|
"grad_norm": 6.350630283355713, |
|
"learning_rate": 6.154105263157894e-05, |
|
"loss": 0.9641, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.002977702264805311, |
|
"grad_norm": 6.287975788116455, |
|
"learning_rate": 6.1010526315789474e-05, |
|
"loss": 1.2104, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.0030127340561559614, |
|
"grad_norm": 5.212226867675781, |
|
"learning_rate": 6.048e-05, |
|
"loss": 0.9887, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.0030477658475066122, |
|
"grad_norm": 6.213005542755127, |
|
"learning_rate": 5.994947368421052e-05, |
|
"loss": 0.6417, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.003082797638857263, |
|
"grad_norm": 6.072321891784668, |
|
"learning_rate": 5.941894736842104e-05, |
|
"loss": 0.8963, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.0031178294302079135, |
|
"grad_norm": 4.825734615325928, |
|
"learning_rate": 5.888842105263158e-05, |
|
"loss": 0.742, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.0031528612215585643, |
|
"grad_norm": 5.300004959106445, |
|
"learning_rate": 5.835789473684211e-05, |
|
"loss": 0.7844, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.003187893012909215, |
|
"grad_norm": 8.157235145568848, |
|
"learning_rate": 5.782736842105263e-05, |
|
"loss": 1.2107, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.003222924804259866, |
|
"grad_norm": 4.026047706604004, |
|
"learning_rate": 5.7296842105263154e-05, |
|
"loss": 0.503, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.0032579565956105164, |
|
"grad_norm": 5.493070125579834, |
|
"learning_rate": 5.676631578947368e-05, |
|
"loss": 0.8302, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.0032929883869611673, |
|
"grad_norm": 5.728854179382324, |
|
"learning_rate": 5.623578947368421e-05, |
|
"loss": 0.7791, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.003328020178311818, |
|
"grad_norm": 5.390102863311768, |
|
"learning_rate": 5.570526315789474e-05, |
|
"loss": 0.9903, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.0033630519696624685, |
|
"grad_norm": 6.115237712860107, |
|
"learning_rate": 5.5174736842105266e-05, |
|
"loss": 0.7596, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.0033980837610131194, |
|
"grad_norm": 5.043397426605225, |
|
"learning_rate": 5.464421052631579e-05, |
|
"loss": 0.7616, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 0.00343311555236377, |
|
"grad_norm": 5.883362293243408, |
|
"learning_rate": 5.411368421052631e-05, |
|
"loss": 0.8057, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.0034681473437144206, |
|
"grad_norm": 6.632864952087402, |
|
"learning_rate": 5.358315789473684e-05, |
|
"loss": 1.0609, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.0035031791350650715, |
|
"grad_norm": 4.529953956604004, |
|
"learning_rate": 5.3052631578947364e-05, |
|
"loss": 0.6286, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.0035031791350650715, |
|
"eval_loss": 1.029884696006775, |
|
"eval_runtime": 2061.1807, |
|
"eval_samples_per_second": 5.831, |
|
"eval_steps_per_second": 1.458, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.0035382109264157223, |
|
"grad_norm": 4.1783599853515625, |
|
"learning_rate": 5.252210526315789e-05, |
|
"loss": 0.9496, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 0.003573242717766373, |
|
"grad_norm": 5.620020866394043, |
|
"learning_rate": 5.199157894736842e-05, |
|
"loss": 1.3367, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.0036082745091170236, |
|
"grad_norm": 5.590014457702637, |
|
"learning_rate": 5.1461052631578946e-05, |
|
"loss": 1.0153, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 0.0036433063004676744, |
|
"grad_norm": 5.261481285095215, |
|
"learning_rate": 5.0930526315789476e-05, |
|
"loss": 0.9291, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.0036783380918183253, |
|
"grad_norm": 6.13337516784668, |
|
"learning_rate": 5.04e-05, |
|
"loss": 1.0299, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.0037133698831689757, |
|
"grad_norm": 4.944069862365723, |
|
"learning_rate": 4.986947368421052e-05, |
|
"loss": 0.9319, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 0.0037484016745196265, |
|
"grad_norm": 5.500107765197754, |
|
"learning_rate": 4.933894736842105e-05, |
|
"loss": 0.9859, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 0.0037834334658702774, |
|
"grad_norm": 4.677202224731445, |
|
"learning_rate": 4.880842105263158e-05, |
|
"loss": 0.9123, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.003818465257220928, |
|
"grad_norm": 6.256136417388916, |
|
"learning_rate": 4.8277894736842103e-05, |
|
"loss": 1.1807, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 0.0038534970485715786, |
|
"grad_norm": 8.950167655944824, |
|
"learning_rate": 4.7747368421052626e-05, |
|
"loss": 1.1984, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.0038885288399222295, |
|
"grad_norm": 7.130908966064453, |
|
"learning_rate": 4.7216842105263156e-05, |
|
"loss": 1.4225, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 0.00392356063127288, |
|
"grad_norm": 7.4759931564331055, |
|
"learning_rate": 4.6686315789473686e-05, |
|
"loss": 1.3882, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.003958592422623531, |
|
"grad_norm": 10.55746078491211, |
|
"learning_rate": 4.615578947368421e-05, |
|
"loss": 1.5642, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 0.0039936242139741816, |
|
"grad_norm": 7.839519500732422, |
|
"learning_rate": 4.562526315789474e-05, |
|
"loss": 1.2803, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.004028656005324832, |
|
"grad_norm": 7.067453861236572, |
|
"learning_rate": 4.509473684210526e-05, |
|
"loss": 1.0843, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.004063687796675483, |
|
"grad_norm": 7.410902500152588, |
|
"learning_rate": 4.4564210526315784e-05, |
|
"loss": 1.1034, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 0.004098719588026134, |
|
"grad_norm": 12.82276439666748, |
|
"learning_rate": 4.403368421052632e-05, |
|
"loss": 1.9388, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 0.004133751379376784, |
|
"grad_norm": 11.6984281539917, |
|
"learning_rate": 4.350315789473684e-05, |
|
"loss": 2.2554, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 0.004168783170727435, |
|
"grad_norm": 9.550557136535645, |
|
"learning_rate": 4.2972631578947366e-05, |
|
"loss": 1.2014, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 0.004203814962078086, |
|
"grad_norm": 6.161247730255127, |
|
"learning_rate": 4.244210526315789e-05, |
|
"loss": 0.8151, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.004238846753428736, |
|
"grad_norm": 7.186214447021484, |
|
"learning_rate": 4.191157894736842e-05, |
|
"loss": 0.9695, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 0.0042738785447793875, |
|
"grad_norm": 6.048009395599365, |
|
"learning_rate": 4.138105263157895e-05, |
|
"loss": 0.9508, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 0.004308910336130038, |
|
"grad_norm": 5.062438011169434, |
|
"learning_rate": 4.085052631578947e-05, |
|
"loss": 0.7397, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 0.004343942127480689, |
|
"grad_norm": 5.460752010345459, |
|
"learning_rate": 4.032e-05, |
|
"loss": 0.8562, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 0.0043789739188313396, |
|
"grad_norm": 4.651021480560303, |
|
"learning_rate": 3.978947368421052e-05, |
|
"loss": 0.7181, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.00441400571018199, |
|
"grad_norm": 7.1053667068481445, |
|
"learning_rate": 3.925894736842105e-05, |
|
"loss": 0.9492, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 0.004449037501532641, |
|
"grad_norm": 4.558400630950928, |
|
"learning_rate": 3.8728421052631575e-05, |
|
"loss": 0.8316, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 0.004484069292883292, |
|
"grad_norm": 4.557405948638916, |
|
"learning_rate": 3.8197894736842105e-05, |
|
"loss": 0.6187, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.004519101084233942, |
|
"grad_norm": 6.3636298179626465, |
|
"learning_rate": 3.766736842105263e-05, |
|
"loss": 0.7996, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 0.004554132875584593, |
|
"grad_norm": 4.011357307434082, |
|
"learning_rate": 3.713684210526316e-05, |
|
"loss": 0.7043, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.004589164666935244, |
|
"grad_norm": 5.260727405548096, |
|
"learning_rate": 3.660631578947369e-05, |
|
"loss": 0.6685, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 0.004624196458285894, |
|
"grad_norm": 5.384383678436279, |
|
"learning_rate": 3.607578947368421e-05, |
|
"loss": 0.8229, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.0046592282496365454, |
|
"grad_norm": 5.44893217086792, |
|
"learning_rate": 3.554526315789473e-05, |
|
"loss": 0.6983, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 0.004694260040987196, |
|
"grad_norm": 5.031147003173828, |
|
"learning_rate": 3.501473684210526e-05, |
|
"loss": 0.6835, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 0.004729291832337846, |
|
"grad_norm": 5.930494785308838, |
|
"learning_rate": 3.448421052631579e-05, |
|
"loss": 0.9263, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.0047643236236884975, |
|
"grad_norm": 5.080932140350342, |
|
"learning_rate": 3.3953684210526315e-05, |
|
"loss": 0.7292, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 0.004799355415039148, |
|
"grad_norm": 5.154940128326416, |
|
"learning_rate": 3.342315789473684e-05, |
|
"loss": 0.6465, |
|
"step": 137 |
|
}, |
|
{ |
|
"epoch": 0.004834387206389798, |
|
"grad_norm": 5.2444939613342285, |
|
"learning_rate": 3.289263157894737e-05, |
|
"loss": 0.7776, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 0.00486941899774045, |
|
"grad_norm": 5.0661211013793945, |
|
"learning_rate": 3.23621052631579e-05, |
|
"loss": 0.7375, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 0.0049044507890911, |
|
"grad_norm": 4.658995628356934, |
|
"learning_rate": 3.183157894736842e-05, |
|
"loss": 0.5983, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.0049394825804417505, |
|
"grad_norm": 5.3926920890808105, |
|
"learning_rate": 3.130105263157895e-05, |
|
"loss": 0.9688, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 0.004974514371792402, |
|
"grad_norm": 6.5495805740356445, |
|
"learning_rate": 3.077052631578947e-05, |
|
"loss": 0.912, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 0.005009546163143052, |
|
"grad_norm": 6.6911702156066895, |
|
"learning_rate": 3.024e-05, |
|
"loss": 1.257, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 0.0050445779544937034, |
|
"grad_norm": 5.760495662689209, |
|
"learning_rate": 2.970947368421052e-05, |
|
"loss": 0.8503, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 0.005079609745844354, |
|
"grad_norm": 3.4422309398651123, |
|
"learning_rate": 2.9178947368421054e-05, |
|
"loss": 0.4197, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.005114641537195004, |
|
"grad_norm": 4.122685432434082, |
|
"learning_rate": 2.8648421052631577e-05, |
|
"loss": 0.6266, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 0.0051496733285456555, |
|
"grad_norm": 4.601315975189209, |
|
"learning_rate": 2.8117894736842103e-05, |
|
"loss": 0.5214, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 0.005184705119896306, |
|
"grad_norm": 4.702164173126221, |
|
"learning_rate": 2.7587368421052633e-05, |
|
"loss": 0.6284, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 0.005219736911246956, |
|
"grad_norm": 5.033868789672852, |
|
"learning_rate": 2.7056842105263156e-05, |
|
"loss": 0.6065, |
|
"step": 149 |
|
}, |
|
{ |
|
"epoch": 0.005254768702597608, |
|
"grad_norm": 5.247815132141113, |
|
"learning_rate": 2.6526315789473682e-05, |
|
"loss": 0.887, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.005254768702597608, |
|
"eval_loss": 0.9501329064369202, |
|
"eval_runtime": 2058.4097, |
|
"eval_samples_per_second": 5.839, |
|
"eval_steps_per_second": 1.46, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.005289800493948258, |
|
"grad_norm": 4.7826056480407715, |
|
"learning_rate": 2.599578947368421e-05, |
|
"loss": 0.8471, |
|
"step": 151 |
|
}, |
|
{ |
|
"epoch": 0.0053248322852989085, |
|
"grad_norm": 4.294804573059082, |
|
"learning_rate": 2.5465263157894738e-05, |
|
"loss": 0.8585, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 0.00535986407664956, |
|
"grad_norm": 5.511264324188232, |
|
"learning_rate": 2.493473684210526e-05, |
|
"loss": 1.0528, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 0.00539489586800021, |
|
"grad_norm": 6.201894760131836, |
|
"learning_rate": 2.440421052631579e-05, |
|
"loss": 1.1332, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 0.0054299276593508606, |
|
"grad_norm": 4.838656425476074, |
|
"learning_rate": 2.3873684210526313e-05, |
|
"loss": 0.8937, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.005464959450701512, |
|
"grad_norm": 5.65955924987793, |
|
"learning_rate": 2.3343157894736843e-05, |
|
"loss": 1.0686, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 0.005499991242052162, |
|
"grad_norm": 6.030940055847168, |
|
"learning_rate": 2.281263157894737e-05, |
|
"loss": 1.0709, |
|
"step": 157 |
|
}, |
|
{ |
|
"epoch": 0.005535023033402813, |
|
"grad_norm": 5.607932090759277, |
|
"learning_rate": 2.2282105263157892e-05, |
|
"loss": 1.0595, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 0.005570054824753464, |
|
"grad_norm": 4.631815433502197, |
|
"learning_rate": 2.175157894736842e-05, |
|
"loss": 0.7808, |
|
"step": 159 |
|
}, |
|
{ |
|
"epoch": 0.005605086616104114, |
|
"grad_norm": 9.391894340515137, |
|
"learning_rate": 2.1221052631578944e-05, |
|
"loss": 1.6867, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.005640118407454766, |
|
"grad_norm": 6.0099616050720215, |
|
"learning_rate": 2.0690526315789474e-05, |
|
"loss": 0.9337, |
|
"step": 161 |
|
}, |
|
{ |
|
"epoch": 0.005675150198805416, |
|
"grad_norm": 7.833316802978516, |
|
"learning_rate": 2.016e-05, |
|
"loss": 1.6899, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 0.0057101819901560665, |
|
"grad_norm": 7.131737232208252, |
|
"learning_rate": 1.9629473684210526e-05, |
|
"loss": 1.3336, |
|
"step": 163 |
|
}, |
|
{ |
|
"epoch": 0.005745213781506718, |
|
"grad_norm": 7.738104343414307, |
|
"learning_rate": 1.9098947368421053e-05, |
|
"loss": 1.3881, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 0.005780245572857368, |
|
"grad_norm": 7.788103103637695, |
|
"learning_rate": 1.856842105263158e-05, |
|
"loss": 1.0198, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.0058152773642080186, |
|
"grad_norm": 7.89583158493042, |
|
"learning_rate": 1.8037894736842105e-05, |
|
"loss": 1.2767, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 0.00585030915555867, |
|
"grad_norm": 11.539169311523438, |
|
"learning_rate": 1.750736842105263e-05, |
|
"loss": 1.9096, |
|
"step": 167 |
|
}, |
|
{ |
|
"epoch": 0.00588534094690932, |
|
"grad_norm": 7.769778251647949, |
|
"learning_rate": 1.6976842105263157e-05, |
|
"loss": 1.4205, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 0.005920372738259971, |
|
"grad_norm": 4.522551536560059, |
|
"learning_rate": 1.6446315789473684e-05, |
|
"loss": 0.6861, |
|
"step": 169 |
|
}, |
|
{ |
|
"epoch": 0.005955404529610622, |
|
"grad_norm": 4.317171096801758, |
|
"learning_rate": 1.591578947368421e-05, |
|
"loss": 0.6416, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.005990436320961272, |
|
"grad_norm": 5.301867485046387, |
|
"learning_rate": 1.5385263157894736e-05, |
|
"loss": 0.8472, |
|
"step": 171 |
|
}, |
|
{ |
|
"epoch": 0.006025468112311923, |
|
"grad_norm": 3.946662425994873, |
|
"learning_rate": 1.485473684210526e-05, |
|
"loss": 0.505, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 0.006060499903662574, |
|
"grad_norm": 5.64309024810791, |
|
"learning_rate": 1.4324210526315789e-05, |
|
"loss": 0.9617, |
|
"step": 173 |
|
}, |
|
{ |
|
"epoch": 0.0060955316950132244, |
|
"grad_norm": 6.693427085876465, |
|
"learning_rate": 1.3793684210526316e-05, |
|
"loss": 0.8974, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 0.006130563486363875, |
|
"grad_norm": 4.707422733306885, |
|
"learning_rate": 1.3263157894736841e-05, |
|
"loss": 0.7757, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.006165595277714526, |
|
"grad_norm": 4.7554826736450195, |
|
"learning_rate": 1.2732631578947369e-05, |
|
"loss": 0.6396, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 0.0062006270690651765, |
|
"grad_norm": 4.768486499786377, |
|
"learning_rate": 1.2202105263157895e-05, |
|
"loss": 0.6989, |
|
"step": 177 |
|
}, |
|
{ |
|
"epoch": 0.006235658860415827, |
|
"grad_norm": 4.941887378692627, |
|
"learning_rate": 1.1671578947368421e-05, |
|
"loss": 0.6704, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 0.006270690651766478, |
|
"grad_norm": 5.827295780181885, |
|
"learning_rate": 1.1141052631578946e-05, |
|
"loss": 0.9484, |
|
"step": 179 |
|
}, |
|
{ |
|
"epoch": 0.006305722443117129, |
|
"grad_norm": 5.031542778015137, |
|
"learning_rate": 1.0610526315789472e-05, |
|
"loss": 0.7449, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.00634075423446778, |
|
"grad_norm": 5.917794704437256, |
|
"learning_rate": 1.008e-05, |
|
"loss": 1.0857, |
|
"step": 181 |
|
}, |
|
{ |
|
"epoch": 0.00637578602581843, |
|
"grad_norm": 4.596070766448975, |
|
"learning_rate": 9.549473684210526e-06, |
|
"loss": 0.6569, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 0.006410817817169081, |
|
"grad_norm": 4.4000325202941895, |
|
"learning_rate": 9.018947368421052e-06, |
|
"loss": 0.6524, |
|
"step": 183 |
|
}, |
|
{ |
|
"epoch": 0.006445849608519732, |
|
"grad_norm": 7.547094345092773, |
|
"learning_rate": 8.488421052631579e-06, |
|
"loss": 1.0409, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 0.0064808813998703824, |
|
"grad_norm": 5.421209335327148, |
|
"learning_rate": 7.957894736842105e-06, |
|
"loss": 0.8219, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.006515913191221033, |
|
"grad_norm": 6.741072177886963, |
|
"learning_rate": 7.42736842105263e-06, |
|
"loss": 0.8836, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 0.006550944982571684, |
|
"grad_norm": 6.431979656219482, |
|
"learning_rate": 6.896842105263158e-06, |
|
"loss": 0.9018, |
|
"step": 187 |
|
}, |
|
{ |
|
"epoch": 0.0065859767739223345, |
|
"grad_norm": 4.790152549743652, |
|
"learning_rate": 6.3663157894736845e-06, |
|
"loss": 0.5921, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 0.006621008565272985, |
|
"grad_norm": 5.673473358154297, |
|
"learning_rate": 5.835789473684211e-06, |
|
"loss": 0.6023, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 0.006656040356623636, |
|
"grad_norm": 4.413316249847412, |
|
"learning_rate": 5.305263157894736e-06, |
|
"loss": 0.6352, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.006691072147974287, |
|
"grad_norm": 5.780127048492432, |
|
"learning_rate": 4.774736842105263e-06, |
|
"loss": 0.9693, |
|
"step": 191 |
|
}, |
|
{ |
|
"epoch": 0.006726103939324937, |
|
"grad_norm": 4.126491069793701, |
|
"learning_rate": 4.244210526315789e-06, |
|
"loss": 0.5177, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 0.006761135730675588, |
|
"grad_norm": 8.283818244934082, |
|
"learning_rate": 3.713684210526315e-06, |
|
"loss": 0.874, |
|
"step": 193 |
|
}, |
|
{ |
|
"epoch": 0.006796167522026239, |
|
"grad_norm": 5.534949779510498, |
|
"learning_rate": 3.1831578947368422e-06, |
|
"loss": 0.8445, |
|
"step": 194 |
|
}, |
|
{ |
|
"epoch": 0.006831199313376889, |
|
"grad_norm": 6.650455951690674, |
|
"learning_rate": 2.652631578947368e-06, |
|
"loss": 1.0534, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.00686623110472754, |
|
"grad_norm": 6.208735942840576, |
|
"learning_rate": 2.1221052631578947e-06, |
|
"loss": 0.7896, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 0.006901262896078191, |
|
"grad_norm": 4.9419732093811035, |
|
"learning_rate": 1.5915789473684211e-06, |
|
"loss": 0.641, |
|
"step": 197 |
|
}, |
|
{ |
|
"epoch": 0.006936294687428841, |
|
"grad_norm": 5.195125579833984, |
|
"learning_rate": 1.0610526315789473e-06, |
|
"loss": 0.7821, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 0.0069713264787794925, |
|
"grad_norm": 5.101795196533203, |
|
"learning_rate": 5.305263157894737e-07, |
|
"loss": 0.5665, |
|
"step": 199 |
|
}, |
|
{ |
|
"epoch": 0.007006358270130143, |
|
"grad_norm": 4.9422173500061035, |
|
"learning_rate": 0.0, |
|
"loss": 0.5634, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.007006358270130143, |
|
"eval_loss": 0.907011091709137, |
|
"eval_runtime": 2060.6424, |
|
"eval_samples_per_second": 5.833, |
|
"eval_steps_per_second": 1.458, |
|
"step": 200 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 200, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 50, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 5, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 0 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.0571696306965709e+17, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|