{ "best_metric": 69.86920835599972, "best_model_checkpoint": "/root/turkic_qa/en_uzn_models/en_uzn_xlm_roberta_base_model/checkpoint-6900", "epoch": 10.0, "eval_steps": 500, "global_step": 6900, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "step": 690, "train_exact_match": 23.076923076923077, "train_f1": 37.566079561862445, "train_runtime": 14.0841, "train_samples_per_second": 89.604, "train_steps_per_second": 3.266 }, { "epoch": 1.0, "grad_norm": 116.39849853515625, "learning_rate": 5e-06, "loss": 4.5006, "step": 690 }, { "epoch": 1.0, "eval_exact_match": 21.46875, "eval_f1": 34.27477969652227, "eval_runtime": 44.8553, "eval_samples_per_second": 90.335, "eval_steps_per_second": 3.233, "step": 690 }, { "epoch": 2.0, "step": 1380, "train_exact_match": 51.04895104895105, "train_f1": 64.28880003049197, "train_runtime": 14.6272, "train_samples_per_second": 87.03, "train_steps_per_second": 3.145 }, { "epoch": 2.0, "grad_norm": 242.94326782226562, "learning_rate": 1e-05, "loss": 2.1628, "step": 1380 }, { "epoch": 2.0, "eval_exact_match": 45.46875, "eval_f1": 60.28283429101314, "eval_runtime": 46.3266, "eval_samples_per_second": 87.466, "eval_steps_per_second": 3.13, "step": 1380 }, { "epoch": 3.0, "step": 2070, "train_exact_match": 57.142857142857146, "train_f1": 71.92623291529159, "train_runtime": 14.5466, "train_samples_per_second": 89.024, "train_steps_per_second": 3.231 }, { "epoch": 3.0, "grad_norm": 18.747507095336914, "learning_rate": 8.750000000000001e-06, "loss": 1.4946, "step": 2070 }, { "epoch": 3.0, "eval_exact_match": 52.40625, "eval_f1": 67.09147147337285, "eval_runtime": 45.3354, "eval_samples_per_second": 89.378, "eval_steps_per_second": 3.198, "step": 2070 }, { "epoch": 4.0, "step": 2760, "train_exact_match": 64.13586413586414, "train_f1": 78.3731582090412, "train_runtime": 14.5689, "train_samples_per_second": 88.888, "train_steps_per_second": 3.226 }, { "epoch": 4.0, "grad_norm": 269.3212890625, "learning_rate": 7.500000000000001e-06, "loss": 1.2186, "step": 2760 }, { "epoch": 4.0, "eval_exact_match": 53.96875, "eval_f1": 68.38825732557214, "eval_runtime": 45.3562, "eval_samples_per_second": 89.337, "eval_steps_per_second": 3.197, "step": 2760 }, { "epoch": 5.0, "step": 3450, "train_exact_match": 68.33166833166833, "train_f1": 82.36153774583077, "train_runtime": 13.878, "train_samples_per_second": 89.278, "train_steps_per_second": 3.243 }, { "epoch": 5.0, "grad_norm": 259.8070068359375, "learning_rate": 6.25e-06, "loss": 1.0359, "step": 3450 }, { "epoch": 5.0, "eval_exact_match": 54.96875, "eval_f1": 69.02164576043303, "eval_runtime": 45.1308, "eval_samples_per_second": 89.783, "eval_steps_per_second": 3.213, "step": 3450 }, { "epoch": 6.0, "step": 4140, "train_exact_match": 71.92807192807193, "train_f1": 83.477467208777, "train_runtime": 14.0926, "train_samples_per_second": 89.55, "train_steps_per_second": 3.264 }, { "epoch": 6.0, "grad_norm": 39.62363052368164, "learning_rate": 5e-06, "loss": 0.8945, "step": 4140 }, { "epoch": 6.0, "eval_exact_match": 55.125, "eval_f1": 69.56052410993578, "eval_runtime": 44.9507, "eval_samples_per_second": 90.143, "eval_steps_per_second": 3.226, "step": 4140 }, { "epoch": 7.0, "step": 4830, "train_exact_match": 75.62437562437563, "train_f1": 86.36686612245309, "train_runtime": 14.0278, "train_samples_per_second": 88.823, "train_steps_per_second": 3.208 }, { "epoch": 7.0, "grad_norm": 122.51107788085938, "learning_rate": 3.7500000000000005e-06, "loss": 0.7916, "step": 4830 }, { "epoch": 7.0, "eval_exact_match": 55.4375, "eval_f1": 69.46738777882736, "eval_runtime": 45.1053, "eval_samples_per_second": 89.834, "eval_steps_per_second": 3.215, "step": 4830 }, { "epoch": 8.0, "step": 5520, "train_exact_match": 75.92407592407592, "train_f1": 86.72722282271204, "train_runtime": 14.2506, "train_samples_per_second": 89.75, "train_steps_per_second": 3.228 }, { "epoch": 8.0, "grad_norm": 202.73541259765625, "learning_rate": 2.5e-06, "loss": 0.7119, "step": 5520 }, { "epoch": 8.0, "eval_exact_match": 56.0, "eval_f1": 69.69766614851221, "eval_runtime": 45.2302, "eval_samples_per_second": 89.586, "eval_steps_per_second": 3.206, "step": 5520 }, { "epoch": 9.0, "step": 6210, "train_exact_match": 77.32267732267732, "train_f1": 88.72058931170982, "train_runtime": 14.7856, "train_samples_per_second": 87.653, "train_steps_per_second": 3.179 }, { "epoch": 9.0, "grad_norm": 5.172795295715332, "learning_rate": 1.25e-06, "loss": 0.6445, "step": 6210 }, { "epoch": 9.0, "eval_exact_match": 55.90625, "eval_f1": 69.57114295943043, "eval_runtime": 45.9508, "eval_samples_per_second": 88.181, "eval_steps_per_second": 3.156, "step": 6210 }, { "epoch": 10.0, "step": 6900, "train_exact_match": 80.81918081918081, "train_f1": 90.37663245353303, "train_runtime": 14.2722, "train_samples_per_second": 88.634, "train_steps_per_second": 3.223 }, { "epoch": 10.0, "grad_norm": 6.113234996795654, "learning_rate": 0.0, "loss": 0.6132, "step": 6900 }, { "epoch": 10.0, "eval_exact_match": 56.09375, "eval_f1": 69.86920835599972, "eval_runtime": 44.9954, "eval_samples_per_second": 90.054, "eval_steps_per_second": 3.223, "step": 6900 }, { "epoch": 10.0, "step": 6900, "total_flos": 3.780898745780736e+16, "train_loss": 1.4068151059358016, "train_runtime": 4389.3737, "train_samples_per_second": 43.954, "train_steps_per_second": 1.572 } ], "logging_steps": 500, "max_steps": 6900, "num_input_tokens_seen": 0, "num_train_epochs": 10, "save_steps": 500, "total_flos": 3.780898745780736e+16, "train_batch_size": 28, "trial_name": null, "trial_params": null }