|
{ |
|
"best_metric": 69.86920835599972, |
|
"best_model_checkpoint": "/root/turkic_qa/en_uzn_models/en_uzn_xlm_roberta_base_model/checkpoint-6900", |
|
"epoch": 10.0, |
|
"eval_steps": 500, |
|
"global_step": 6900, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"step": 690, |
|
"train_exact_match": 23.076923076923077, |
|
"train_f1": 37.566079561862445, |
|
"train_runtime": 14.0841, |
|
"train_samples_per_second": 89.604, |
|
"train_steps_per_second": 3.266 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 116.39849853515625, |
|
"learning_rate": 5e-06, |
|
"loss": 4.5006, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_exact_match": 21.46875, |
|
"eval_f1": 34.27477969652227, |
|
"eval_runtime": 44.8553, |
|
"eval_samples_per_second": 90.335, |
|
"eval_steps_per_second": 3.233, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"step": 1380, |
|
"train_exact_match": 51.04895104895105, |
|
"train_f1": 64.28880003049197, |
|
"train_runtime": 14.6272, |
|
"train_samples_per_second": 87.03, |
|
"train_steps_per_second": 3.145 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 242.94326782226562, |
|
"learning_rate": 1e-05, |
|
"loss": 2.1628, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_exact_match": 45.46875, |
|
"eval_f1": 60.28283429101314, |
|
"eval_runtime": 46.3266, |
|
"eval_samples_per_second": 87.466, |
|
"eval_steps_per_second": 3.13, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"step": 2070, |
|
"train_exact_match": 57.142857142857146, |
|
"train_f1": 71.92623291529159, |
|
"train_runtime": 14.5466, |
|
"train_samples_per_second": 89.024, |
|
"train_steps_per_second": 3.231 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"grad_norm": 18.747507095336914, |
|
"learning_rate": 8.750000000000001e-06, |
|
"loss": 1.4946, |
|
"step": 2070 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_exact_match": 52.40625, |
|
"eval_f1": 67.09147147337285, |
|
"eval_runtime": 45.3354, |
|
"eval_samples_per_second": 89.378, |
|
"eval_steps_per_second": 3.198, |
|
"step": 2070 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"step": 2760, |
|
"train_exact_match": 64.13586413586414, |
|
"train_f1": 78.3731582090412, |
|
"train_runtime": 14.5689, |
|
"train_samples_per_second": 88.888, |
|
"train_steps_per_second": 3.226 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"grad_norm": 269.3212890625, |
|
"learning_rate": 7.500000000000001e-06, |
|
"loss": 1.2186, |
|
"step": 2760 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_exact_match": 53.96875, |
|
"eval_f1": 68.38825732557214, |
|
"eval_runtime": 45.3562, |
|
"eval_samples_per_second": 89.337, |
|
"eval_steps_per_second": 3.197, |
|
"step": 2760 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"step": 3450, |
|
"train_exact_match": 68.33166833166833, |
|
"train_f1": 82.36153774583077, |
|
"train_runtime": 13.878, |
|
"train_samples_per_second": 89.278, |
|
"train_steps_per_second": 3.243 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"grad_norm": 259.8070068359375, |
|
"learning_rate": 6.25e-06, |
|
"loss": 1.0359, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_exact_match": 54.96875, |
|
"eval_f1": 69.02164576043303, |
|
"eval_runtime": 45.1308, |
|
"eval_samples_per_second": 89.783, |
|
"eval_steps_per_second": 3.213, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"step": 4140, |
|
"train_exact_match": 71.92807192807193, |
|
"train_f1": 83.477467208777, |
|
"train_runtime": 14.0926, |
|
"train_samples_per_second": 89.55, |
|
"train_steps_per_second": 3.264 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"grad_norm": 39.62363052368164, |
|
"learning_rate": 5e-06, |
|
"loss": 0.8945, |
|
"step": 4140 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_exact_match": 55.125, |
|
"eval_f1": 69.56052410993578, |
|
"eval_runtime": 44.9507, |
|
"eval_samples_per_second": 90.143, |
|
"eval_steps_per_second": 3.226, |
|
"step": 4140 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"step": 4830, |
|
"train_exact_match": 75.62437562437563, |
|
"train_f1": 86.36686612245309, |
|
"train_runtime": 14.0278, |
|
"train_samples_per_second": 88.823, |
|
"train_steps_per_second": 3.208 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"grad_norm": 122.51107788085938, |
|
"learning_rate": 3.7500000000000005e-06, |
|
"loss": 0.7916, |
|
"step": 4830 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_exact_match": 55.4375, |
|
"eval_f1": 69.46738777882736, |
|
"eval_runtime": 45.1053, |
|
"eval_samples_per_second": 89.834, |
|
"eval_steps_per_second": 3.215, |
|
"step": 4830 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"step": 5520, |
|
"train_exact_match": 75.92407592407592, |
|
"train_f1": 86.72722282271204, |
|
"train_runtime": 14.2506, |
|
"train_samples_per_second": 89.75, |
|
"train_steps_per_second": 3.228 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"grad_norm": 202.73541259765625, |
|
"learning_rate": 2.5e-06, |
|
"loss": 0.7119, |
|
"step": 5520 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_exact_match": 56.0, |
|
"eval_f1": 69.69766614851221, |
|
"eval_runtime": 45.2302, |
|
"eval_samples_per_second": 89.586, |
|
"eval_steps_per_second": 3.206, |
|
"step": 5520 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"step": 6210, |
|
"train_exact_match": 77.32267732267732, |
|
"train_f1": 88.72058931170982, |
|
"train_runtime": 14.7856, |
|
"train_samples_per_second": 87.653, |
|
"train_steps_per_second": 3.179 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"grad_norm": 5.172795295715332, |
|
"learning_rate": 1.25e-06, |
|
"loss": 0.6445, |
|
"step": 6210 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_exact_match": 55.90625, |
|
"eval_f1": 69.57114295943043, |
|
"eval_runtime": 45.9508, |
|
"eval_samples_per_second": 88.181, |
|
"eval_steps_per_second": 3.156, |
|
"step": 6210 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"step": 6900, |
|
"train_exact_match": 80.81918081918081, |
|
"train_f1": 90.37663245353303, |
|
"train_runtime": 14.2722, |
|
"train_samples_per_second": 88.634, |
|
"train_steps_per_second": 3.223 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"grad_norm": 6.113234996795654, |
|
"learning_rate": 0.0, |
|
"loss": 0.6132, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_exact_match": 56.09375, |
|
"eval_f1": 69.86920835599972, |
|
"eval_runtime": 44.9954, |
|
"eval_samples_per_second": 90.054, |
|
"eval_steps_per_second": 3.223, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"step": 6900, |
|
"total_flos": 3.780898745780736e+16, |
|
"train_loss": 1.4068151059358016, |
|
"train_runtime": 4389.3737, |
|
"train_samples_per_second": 43.954, |
|
"train_steps_per_second": 1.572 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 6900, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 10, |
|
"save_steps": 500, |
|
"total_flos": 3.780898745780736e+16, |
|
"train_batch_size": 28, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|