{ "best_metric": 0.8285950217539291, "best_model_checkpoint": "/content/temp_assamese/checkpoint-50000", "epoch": 1.0, "eval_steps": 5000, "global_step": 53713, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.09308733453726287, "grad_norm": 5.457111358642578, "learning_rate": 4.5348425893172976e-05, "loss": 2.4466, "step": 5000 }, { "epoch": 0.09308733453726287, "eval_accuracy": 0.707468341537135, "eval_loss": 1.5004358291625977, "eval_runtime": 308.6127, "eval_samples_per_second": 146.822, "eval_steps_per_second": 9.177, "step": 5000 }, { "epoch": 0.18617466907452573, "grad_norm": 4.776674270629883, "learning_rate": 4.0694990039655205e-05, "loss": 1.4994, "step": 10000 }, { "epoch": 0.18617466907452573, "eval_accuracy": 0.7532453087820641, "eval_loss": 1.2256046533584595, "eval_runtime": 306.864, "eval_samples_per_second": 147.658, "eval_steps_per_second": 9.229, "step": 10000 }, { "epoch": 0.2792620036117886, "grad_norm": 3.9309194087982178, "learning_rate": 3.604248505948281e-05, "loss": 1.2888, "step": 15000 }, { "epoch": 0.2792620036117886, "eval_accuracy": 0.7765668816186476, "eval_loss": 1.099416732788086, "eval_runtime": 321.8088, "eval_samples_per_second": 140.801, "eval_steps_per_second": 8.8, "step": 15000 }, { "epoch": 0.37234933814905147, "grad_norm": 3.988945722579956, "learning_rate": 3.1389980079310413e-05, "loss": 1.1746, "step": 20000 }, { "epoch": 0.37234933814905147, "eval_accuracy": 0.791549800931217, "eval_loss": 1.0090231895446777, "eval_runtime": 316.5344, "eval_samples_per_second": 143.147, "eval_steps_per_second": 8.947, "step": 20000 }, { "epoch": 0.4654366726863143, "grad_norm": 4.230010509490967, "learning_rate": 2.6736544225792642e-05, "loss": 1.0994, "step": 25000 }, { "epoch": 0.4654366726863143, "eval_accuracy": 0.8021278468205446, "eval_loss": 0.9513992667198181, "eval_runtime": 317.1699, "eval_samples_per_second": 142.86, "eval_steps_per_second": 8.929, "step": 25000 }, { "epoch": 0.5585240072235772, "grad_norm": 4.287986755371094, "learning_rate": 2.2084970118965616e-05, "loss": 1.0379, "step": 30000 }, { "epoch": 0.5585240072235772, "eval_accuracy": 0.8115293649487124, "eval_loss": 0.9028974771499634, "eval_runtime": 316.9066, "eval_samples_per_second": 142.979, "eval_steps_per_second": 8.936, "step": 30000 }, { "epoch": 0.65161134176084, "grad_norm": 3.521850347518921, "learning_rate": 1.743339601213859e-05, "loss": 0.9956, "step": 35000 }, { "epoch": 0.65161134176084, "eval_accuracy": 0.8174002465681974, "eval_loss": 0.8695101737976074, "eval_runtime": 316.9452, "eval_samples_per_second": 142.962, "eval_steps_per_second": 8.935, "step": 35000 }, { "epoch": 0.7446986762981029, "grad_norm": 4.046538829803467, "learning_rate": 1.2779960158620818e-05, "loss": 0.9647, "step": 40000 }, { "epoch": 0.7446986762981029, "eval_accuracy": 0.8216175421669631, "eval_loss": 0.8461592793464661, "eval_runtime": 318.3007, "eval_samples_per_second": 142.353, "eval_steps_per_second": 8.897, "step": 40000 }, { "epoch": 0.8377860108353657, "grad_norm": 4.023233413696289, "learning_rate": 8.12745517844842e-06, "loss": 0.9351, "step": 45000 }, { "epoch": 0.8377860108353657, "eval_accuracy": 0.8258444821249434, "eval_loss": 0.8274036645889282, "eval_runtime": 318.395, "eval_samples_per_second": 142.311, "eval_steps_per_second": 8.895, "step": 45000 }, { "epoch": 0.9308733453726286, "grad_norm": 3.7155344486236572, "learning_rate": 3.4749501982760224e-06, "loss": 0.9194, "step": 50000 }, { "epoch": 0.9308733453726286, "eval_accuracy": 0.8285950217539291, "eval_loss": 0.8120360374450684, "eval_runtime": 309.307, "eval_samples_per_second": 146.492, "eval_steps_per_second": 9.156, "step": 50000 }, { "epoch": 1.0, "step": 53713, "total_flos": 1.1339138340497818e+17, "train_loss": 1.2134282816267128, "train_runtime": 14742.2291, "train_samples_per_second": 58.295, "train_steps_per_second": 3.643 } ], "logging_steps": 5000, "max_steps": 53713, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 5000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1.1339138340497818e+17, "train_batch_size": 16, "trial_name": null, "trial_params": null }