|
{ |
|
"best_metric": 0.930276087973795, |
|
"best_model_checkpoint": "/content/dissertation/scripts/ner/output/checkpoint-2016", |
|
"epoch": 10.0, |
|
"eval_steps": 500, |
|
"global_step": 5040, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.9920634920634921, |
|
"grad_norm": 0.06281786412000656, |
|
"learning_rate": 4.503968253968254e-05, |
|
"loss": 0.0189, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.9983953339100828, |
|
"eval_f1": 0.9040358744394619, |
|
"eval_loss": 0.00518822530284524, |
|
"eval_precision": 0.8712186689714779, |
|
"eval_recall": 0.9394221808014911, |
|
"eval_runtime": 13.2131, |
|
"eval_samples_per_second": 525.689, |
|
"eval_steps_per_second": 65.768, |
|
"step": 504 |
|
}, |
|
{ |
|
"epoch": 1.9841269841269842, |
|
"grad_norm": 0.20689290761947632, |
|
"learning_rate": 4.007936507936508e-05, |
|
"loss": 0.0047, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.9986883598917199, |
|
"eval_f1": 0.9244402985074627, |
|
"eval_loss": 0.004834321793168783, |
|
"eval_precision": 0.9253034547152195, |
|
"eval_recall": 0.923578751164958, |
|
"eval_runtime": 13.2434, |
|
"eval_samples_per_second": 524.487, |
|
"eval_steps_per_second": 65.618, |
|
"step": 1008 |
|
}, |
|
{ |
|
"epoch": 2.9761904761904763, |
|
"grad_norm": 0.02256501279771328, |
|
"learning_rate": 3.511904761904762e-05, |
|
"loss": 0.0027, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.9986255686099406, |
|
"eval_f1": 0.9239384041063929, |
|
"eval_loss": 0.005881821736693382, |
|
"eval_precision": 0.9252336448598131, |
|
"eval_recall": 0.9226467847157502, |
|
"eval_runtime": 13.1715, |
|
"eval_samples_per_second": 527.351, |
|
"eval_steps_per_second": 65.976, |
|
"step": 1512 |
|
}, |
|
{ |
|
"epoch": 3.9682539682539684, |
|
"grad_norm": 0.0326182059943676, |
|
"learning_rate": 3.0158730158730158e-05, |
|
"loss": 0.0015, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.9987162671280663, |
|
"eval_f1": 0.930276087973795, |
|
"eval_loss": 0.0064844791777431965, |
|
"eval_precision": 0.9342105263157895, |
|
"eval_recall": 0.9263746505125815, |
|
"eval_runtime": 13.5939, |
|
"eval_samples_per_second": 510.963, |
|
"eval_steps_per_second": 63.926, |
|
"step": 2016 |
|
}, |
|
{ |
|
"epoch": 4.9603174603174605, |
|
"grad_norm": 0.1600140929222107, |
|
"learning_rate": 2.5198412698412697e-05, |
|
"loss": 0.0011, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.9986116149917673, |
|
"eval_f1": 0.923076923076923, |
|
"eval_loss": 0.007346163038164377, |
|
"eval_precision": 0.9072907290729073, |
|
"eval_recall": 0.9394221808014911, |
|
"eval_runtime": 13.3438, |
|
"eval_samples_per_second": 520.543, |
|
"eval_steps_per_second": 65.124, |
|
"step": 2520 |
|
}, |
|
{ |
|
"epoch": 5.9523809523809526, |
|
"grad_norm": 0.09641193598508835, |
|
"learning_rate": 2.023809523809524e-05, |
|
"loss": 0.0005, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.998444171573689, |
|
"eval_f1": 0.9204281060958585, |
|
"eval_loss": 0.009004838764667511, |
|
"eval_precision": 0.9191449814126395, |
|
"eval_recall": 0.9217148182665424, |
|
"eval_runtime": 13.3195, |
|
"eval_samples_per_second": 521.491, |
|
"eval_steps_per_second": 65.243, |
|
"step": 3024 |
|
}, |
|
{ |
|
"epoch": 6.944444444444445, |
|
"grad_norm": 0.029784763231873512, |
|
"learning_rate": 1.527777777777778e-05, |
|
"loss": 0.0007, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.9985767309463344, |
|
"eval_f1": 0.9190432382704691, |
|
"eval_loss": 0.008385799825191498, |
|
"eval_precision": 0.9073569482288828, |
|
"eval_recall": 0.9310344827586207, |
|
"eval_runtime": 13.4485, |
|
"eval_samples_per_second": 516.487, |
|
"eval_steps_per_second": 64.617, |
|
"step": 3528 |
|
}, |
|
{ |
|
"epoch": 7.936507936507937, |
|
"grad_norm": 0.0010318646673113108, |
|
"learning_rate": 1.0317460317460318e-05, |
|
"loss": 0.0004, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.9985558005190746, |
|
"eval_f1": 0.9213793103448276, |
|
"eval_loss": 0.008501913398504257, |
|
"eval_precision": 0.9092558983666061, |
|
"eval_recall": 0.9338303821062441, |
|
"eval_runtime": 13.3613, |
|
"eval_samples_per_second": 519.86, |
|
"eval_steps_per_second": 65.039, |
|
"step": 4032 |
|
}, |
|
{ |
|
"epoch": 8.928571428571429, |
|
"grad_norm": 0.0005677491426467896, |
|
"learning_rate": 5.357142857142857e-06, |
|
"loss": 0.0003, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.9987023135098931, |
|
"eval_f1": 0.9270544783010157, |
|
"eval_loss": 0.008021777495741844, |
|
"eval_precision": 0.918572735590119, |
|
"eval_recall": 0.9356943150046598, |
|
"eval_runtime": 13.487, |
|
"eval_samples_per_second": 515.014, |
|
"eval_steps_per_second": 64.432, |
|
"step": 4536 |
|
}, |
|
{ |
|
"epoch": 9.920634920634921, |
|
"grad_norm": 0.0005077613168396056, |
|
"learning_rate": 3.9682539682539683e-07, |
|
"loss": 0.0002, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.9986883598917199, |
|
"eval_f1": 0.9278445883441258, |
|
"eval_loss": 0.008253143168985844, |
|
"eval_precision": 0.921028466483012, |
|
"eval_recall": 0.934762348555452, |
|
"eval_runtime": 13.7257, |
|
"eval_samples_per_second": 506.057, |
|
"eval_steps_per_second": 63.312, |
|
"step": 5040 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"step": 5040, |
|
"total_flos": 1.394320679130096e+16, |
|
"train_loss": 0.0030765269683407886, |
|
"train_runtime": 1249.6681, |
|
"train_samples_per_second": 257.924, |
|
"train_steps_per_second": 4.033 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 5040, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 10, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.394320679130096e+16, |
|
"train_batch_size": 32, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|