|
{ |
|
"best_global_step": 11700, |
|
"best_metric": 0.5111122131347656, |
|
"best_model_checkpoint": "./output/gpt-medmentions/checkpoint-11700", |
|
"epoch": 5.0, |
|
"eval_steps": 500, |
|
"global_step": 29250, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.08547008547008547, |
|
"grad_norm": 7.1186957359313965, |
|
"learning_rate": 4.9153846153846157e-05, |
|
"loss": 0.9374, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.17094017094017094, |
|
"grad_norm": 2.6842756271362305, |
|
"learning_rate": 4.829914529914531e-05, |
|
"loss": 0.6708, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.2564102564102564, |
|
"grad_norm": 4.874898433685303, |
|
"learning_rate": 4.7444444444444445e-05, |
|
"loss": 0.6453, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.3418803418803419, |
|
"grad_norm": 5.255520820617676, |
|
"learning_rate": 4.658974358974359e-05, |
|
"loss": 0.6121, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.42735042735042733, |
|
"grad_norm": 2.5840187072753906, |
|
"learning_rate": 4.573504273504274e-05, |
|
"loss": 0.5936, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.5128205128205128, |
|
"grad_norm": 5.246805667877197, |
|
"learning_rate": 4.488547008547009e-05, |
|
"loss": 0.5943, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.5982905982905983, |
|
"grad_norm": 3.491377592086792, |
|
"learning_rate": 4.4032478632478637e-05, |
|
"loss": 0.5663, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.6837606837606838, |
|
"grad_norm": 4.0636210441589355, |
|
"learning_rate": 4.317777777777778e-05, |
|
"loss": 0.5611, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.7692307692307693, |
|
"grad_norm": 4.316960334777832, |
|
"learning_rate": 4.2323076923076925e-05, |
|
"loss": 0.5449, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.8547008547008547, |
|
"grad_norm": 4.260451316833496, |
|
"learning_rate": 4.146837606837607e-05, |
|
"loss": 0.5481, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.9401709401709402, |
|
"grad_norm": 5.917566299438477, |
|
"learning_rate": 4.061367521367522e-05, |
|
"loss": 0.5307, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.8341444876242783, |
|
"eval_f1": 0.44006334125098967, |
|
"eval_loss": 0.5368949770927429, |
|
"eval_precision": 0.41286584459961373, |
|
"eval_recall": 0.47109679606713, |
|
"eval_runtime": 20.612, |
|
"eval_samples_per_second": 141.859, |
|
"eval_steps_per_second": 17.757, |
|
"step": 5850 |
|
}, |
|
{ |
|
"epoch": 1.0256410256410255, |
|
"grad_norm": 3.265369415283203, |
|
"learning_rate": 3.975897435897436e-05, |
|
"loss": 0.4748, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 1.1111111111111112, |
|
"grad_norm": 3.2452337741851807, |
|
"learning_rate": 3.890427350427351e-05, |
|
"loss": 0.3767, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 1.1965811965811965, |
|
"grad_norm": 2.8009512424468994, |
|
"learning_rate": 3.804957264957265e-05, |
|
"loss": 0.3761, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 1.282051282051282, |
|
"grad_norm": 5.858109951019287, |
|
"learning_rate": 3.71948717948718e-05, |
|
"loss": 0.3922, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 1.3675213675213675, |
|
"grad_norm": 1.7275584936141968, |
|
"learning_rate": 3.634017094017094e-05, |
|
"loss": 0.3677, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 1.452991452991453, |
|
"grad_norm": 4.6104044914245605, |
|
"learning_rate": 3.5485470085470085e-05, |
|
"loss": 0.3746, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 1.5384615384615383, |
|
"grad_norm": 2.6036839485168457, |
|
"learning_rate": 3.4630769230769236e-05, |
|
"loss": 0.3661, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 1.623931623931624, |
|
"grad_norm": 2.7406065464019775, |
|
"learning_rate": 3.3776068376068374e-05, |
|
"loss": 0.3712, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 1.7094017094017095, |
|
"grad_norm": 5.254650115966797, |
|
"learning_rate": 3.2921367521367525e-05, |
|
"loss": 0.3774, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 1.7948717948717947, |
|
"grad_norm": 2.274414539337158, |
|
"learning_rate": 3.206666666666667e-05, |
|
"loss": 0.3541, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 1.8803418803418803, |
|
"grad_norm": 3.5981504917144775, |
|
"learning_rate": 3.121196581196581e-05, |
|
"loss": 0.3694, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 1.965811965811966, |
|
"grad_norm": 3.6442151069641113, |
|
"learning_rate": 3.0357264957264958e-05, |
|
"loss": 0.3585, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.8454107464662687, |
|
"eval_f1": 0.48178988326848243, |
|
"eval_loss": 0.5111122131347656, |
|
"eval_precision": 0.4453316069630269, |
|
"eval_recall": 0.5247499576199356, |
|
"eval_runtime": 20.6467, |
|
"eval_samples_per_second": 141.621, |
|
"eval_steps_per_second": 17.727, |
|
"step": 11700 |
|
}, |
|
{ |
|
"epoch": 2.051282051282051, |
|
"grad_norm": 3.4292666912078857, |
|
"learning_rate": 2.9502564102564105e-05, |
|
"loss": 0.2559, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 2.1367521367521367, |
|
"grad_norm": 6.138054370880127, |
|
"learning_rate": 2.864786324786325e-05, |
|
"loss": 0.1817, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 2.2222222222222223, |
|
"grad_norm": 3.659104347229004, |
|
"learning_rate": 2.7793162393162394e-05, |
|
"loss": 0.1759, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 2.3076923076923075, |
|
"grad_norm": 5.470319747924805, |
|
"learning_rate": 2.693846153846154e-05, |
|
"loss": 0.184, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 2.393162393162393, |
|
"grad_norm": 2.9865291118621826, |
|
"learning_rate": 2.6083760683760682e-05, |
|
"loss": 0.1761, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 2.4786324786324787, |
|
"grad_norm": 6.152403831481934, |
|
"learning_rate": 2.522905982905983e-05, |
|
"loss": 0.1798, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 2.564102564102564, |
|
"grad_norm": 4.3192338943481445, |
|
"learning_rate": 2.4374358974358977e-05, |
|
"loss": 0.1757, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 2.6495726495726495, |
|
"grad_norm": 3.217804193496704, |
|
"learning_rate": 2.3521367521367523e-05, |
|
"loss": 0.1738, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 2.735042735042735, |
|
"grad_norm": 3.670557737350464, |
|
"learning_rate": 2.2666666666666668e-05, |
|
"loss": 0.1861, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 2.8205128205128203, |
|
"grad_norm": 2.3006069660186768, |
|
"learning_rate": 2.1811965811965812e-05, |
|
"loss": 0.1705, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 2.905982905982906, |
|
"grad_norm": 1.9008346796035767, |
|
"learning_rate": 2.0958974358974358e-05, |
|
"loss": 0.1672, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 2.9914529914529915, |
|
"grad_norm": 1.8553671836853027, |
|
"learning_rate": 2.0104273504273506e-05, |
|
"loss": 0.1758, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.8497137463068983, |
|
"eval_f1": 0.48074844074844075, |
|
"eval_loss": 0.6349462270736694, |
|
"eval_precision": 0.4718413320274241, |
|
"eval_recall": 0.4899983047974233, |
|
"eval_runtime": 20.6084, |
|
"eval_samples_per_second": 141.884, |
|
"eval_steps_per_second": 17.76, |
|
"step": 17550 |
|
}, |
|
{ |
|
"epoch": 3.076923076923077, |
|
"grad_norm": 1.145456314086914, |
|
"learning_rate": 1.924957264957265e-05, |
|
"loss": 0.0777, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 3.1623931623931623, |
|
"grad_norm": 2.4131710529327393, |
|
"learning_rate": 1.8394871794871797e-05, |
|
"loss": 0.0769, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 3.247863247863248, |
|
"grad_norm": 2.5216588973999023, |
|
"learning_rate": 1.754017094017094e-05, |
|
"loss": 0.0779, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 3.3333333333333335, |
|
"grad_norm": 2.811354160308838, |
|
"learning_rate": 1.6685470085470086e-05, |
|
"loss": 0.0804, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 3.4188034188034186, |
|
"grad_norm": 0.5833438634872437, |
|
"learning_rate": 1.5830769230769233e-05, |
|
"loss": 0.0772, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 3.5042735042735043, |
|
"grad_norm": 3.458584785461426, |
|
"learning_rate": 1.4976068376068378e-05, |
|
"loss": 0.0751, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 3.58974358974359, |
|
"grad_norm": 0.8929054141044617, |
|
"learning_rate": 1.4121367521367524e-05, |
|
"loss": 0.0761, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 3.6752136752136755, |
|
"grad_norm": 5.908766269683838, |
|
"learning_rate": 1.3268376068376068e-05, |
|
"loss": 0.0736, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 3.7606837606837606, |
|
"grad_norm": 1.0228583812713623, |
|
"learning_rate": 1.2413675213675214e-05, |
|
"loss": 0.0716, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 3.8461538461538463, |
|
"grad_norm": 0.626966118812561, |
|
"learning_rate": 1.1560683760683762e-05, |
|
"loss": 0.0718, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 3.931623931623932, |
|
"grad_norm": 1.9417258501052856, |
|
"learning_rate": 1.0705982905982906e-05, |
|
"loss": 0.0751, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.8496892277892879, |
|
"eval_f1": 0.49008894029434047, |
|
"eval_loss": 0.9264360070228577, |
|
"eval_precision": 0.46282485875706214, |
|
"eval_recall": 0.5207662315646719, |
|
"eval_runtime": 20.6775, |
|
"eval_samples_per_second": 141.41, |
|
"eval_steps_per_second": 17.7, |
|
"step": 23400 |
|
}, |
|
{ |
|
"epoch": 4.017094017094017, |
|
"grad_norm": 0.14389610290527344, |
|
"learning_rate": 9.851282051282052e-06, |
|
"loss": 0.0641, |
|
"step": 23500 |
|
}, |
|
{ |
|
"epoch": 4.102564102564102, |
|
"grad_norm": 0.6148084402084351, |
|
"learning_rate": 8.996581196581196e-06, |
|
"loss": 0.0352, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 4.188034188034188, |
|
"grad_norm": 1.3859856128692627, |
|
"learning_rate": 8.141880341880342e-06, |
|
"loss": 0.0399, |
|
"step": 24500 |
|
}, |
|
{ |
|
"epoch": 4.273504273504273, |
|
"grad_norm": 1.6096197366714478, |
|
"learning_rate": 7.287179487179488e-06, |
|
"loss": 0.0385, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 4.358974358974359, |
|
"grad_norm": 0.26734989881515503, |
|
"learning_rate": 6.432478632478633e-06, |
|
"loss": 0.0411, |
|
"step": 25500 |
|
}, |
|
{ |
|
"epoch": 4.444444444444445, |
|
"grad_norm": 1.3472919464111328, |
|
"learning_rate": 5.577777777777778e-06, |
|
"loss": 0.0395, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 4.52991452991453, |
|
"grad_norm": 0.8369725942611694, |
|
"learning_rate": 4.723076923076923e-06, |
|
"loss": 0.0432, |
|
"step": 26500 |
|
}, |
|
{ |
|
"epoch": 4.615384615384615, |
|
"grad_norm": 0.7225199341773987, |
|
"learning_rate": 3.87008547008547e-06, |
|
"loss": 0.0412, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 4.700854700854701, |
|
"grad_norm": 0.6592767834663391, |
|
"learning_rate": 3.0153846153846154e-06, |
|
"loss": 0.0407, |
|
"step": 27500 |
|
}, |
|
{ |
|
"epoch": 4.786324786324786, |
|
"grad_norm": 0.9876635670661926, |
|
"learning_rate": 2.160683760683761e-06, |
|
"loss": 0.0372, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 4.871794871794872, |
|
"grad_norm": 0.7053186297416687, |
|
"learning_rate": 1.3059829059829061e-06, |
|
"loss": 0.0384, |
|
"step": 28500 |
|
}, |
|
{ |
|
"epoch": 4.957264957264957, |
|
"grad_norm": 0.24596278369426727, |
|
"learning_rate": 4.52991452991453e-07, |
|
"loss": 0.0387, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.8518223388213949, |
|
"eval_f1": 0.4960441433034446, |
|
"eval_loss": 1.0903491973876953, |
|
"eval_precision": 0.47575686823877344, |
|
"eval_recall": 0.5181386675707748, |
|
"eval_runtime": 20.8036, |
|
"eval_samples_per_second": 140.552, |
|
"eval_steps_per_second": 17.593, |
|
"step": 29250 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"step": 29250, |
|
"total_flos": 5.182622875540416e+16, |
|
"train_loss": 0.2549698821499816, |
|
"train_runtime": 7489.8753, |
|
"train_samples_per_second": 15.62, |
|
"train_steps_per_second": 3.905 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 29250, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 5, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 5.182622875540416e+16, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|