{ "best_metric": 0.87736, "best_model_checkpoint": "outputs/checkpoint-702", "epoch": 3.0, "eval_steps": 78, "global_step": 1173, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 1.6949152542372883e-07, "loss": 0.6918, "step": 1 }, { "epoch": 0.2, "learning_rate": 1.3220338983050848e-05, "loss": 0.6487, "step": 78 }, { "epoch": 0.2, "eval_accuracy": 0.81168, "eval_loss": 0.4401787221431732, "eval_runtime": 30.2854, "eval_samples_per_second": 825.48, "eval_steps_per_second": 12.911, "step": 78 }, { "epoch": 0.4, "learning_rate": 1.9279620853080568e-05, "loss": 0.402, "step": 156 }, { "epoch": 0.4, "eval_accuracy": 0.8416, "eval_loss": 0.356289267539978, "eval_runtime": 30.4052, "eval_samples_per_second": 822.228, "eval_steps_per_second": 12.86, "step": 156 }, { "epoch": 0.6, "learning_rate": 1.780094786729858e-05, "loss": 0.3528, "step": 234 }, { "epoch": 0.6, "eval_accuracy": 0.8434, "eval_loss": 0.3521649241447449, "eval_runtime": 30.6429, "eval_samples_per_second": 815.849, "eval_steps_per_second": 12.76, "step": 234 }, { "epoch": 0.8, "learning_rate": 1.632227488151659e-05, "loss": 0.3362, "step": 312 }, { "epoch": 0.8, "eval_accuracy": 0.86516, "eval_loss": 0.30993545055389404, "eval_runtime": 30.6544, "eval_samples_per_second": 815.543, "eval_steps_per_second": 12.755, "step": 312 }, { "epoch": 1.0, "learning_rate": 1.4843601895734598e-05, "loss": 0.3184, "step": 390 }, { "epoch": 1.0, "eval_accuracy": 0.86996, "eval_loss": 0.30280688405036926, "eval_runtime": 30.6512, "eval_samples_per_second": 815.629, "eval_steps_per_second": 12.756, "step": 390 }, { "epoch": 1.2, "learning_rate": 1.3364928909952607e-05, "loss": 0.265, "step": 468 }, { "epoch": 1.2, "eval_accuracy": 0.8738, "eval_loss": 0.305215448141098, "eval_runtime": 30.5331, "eval_samples_per_second": 818.783, "eval_steps_per_second": 12.806, "step": 468 }, { "epoch": 1.4, "learning_rate": 1.1886255924170618e-05, "loss": 0.2593, "step": 546 }, { "epoch": 1.4, "eval_accuracy": 0.87348, "eval_loss": 0.29833072423934937, "eval_runtime": 30.6351, "eval_samples_per_second": 816.057, "eval_steps_per_second": 12.763, "step": 546 }, { "epoch": 1.6, "learning_rate": 1.0407582938388628e-05, "loss": 0.2537, "step": 624 }, { "epoch": 1.6, "eval_accuracy": 0.87536, "eval_loss": 0.2977478802204132, "eval_runtime": 30.5918, "eval_samples_per_second": 817.213, "eval_steps_per_second": 12.781, "step": 624 }, { "epoch": 1.8, "learning_rate": 8.928909952606636e-06, "loss": 0.2558, "step": 702 }, { "epoch": 1.8, "eval_accuracy": 0.87736, "eval_loss": 0.29114434123039246, "eval_runtime": 30.9298, "eval_samples_per_second": 808.283, "eval_steps_per_second": 12.642, "step": 702 }, { "epoch": 1.99, "learning_rate": 7.450236966824646e-06, "loss": 0.2476, "step": 780 }, { "epoch": 1.99, "eval_accuracy": 0.87508, "eval_loss": 0.2907171845436096, "eval_runtime": 30.4929, "eval_samples_per_second": 819.862, "eval_steps_per_second": 12.823, "step": 780 }, { "epoch": 2.19, "learning_rate": 5.971563981042654e-06, "loss": 0.1941, "step": 858 }, { "epoch": 2.19, "eval_accuracy": 0.87736, "eval_loss": 0.3151108920574188, "eval_runtime": 30.5003, "eval_samples_per_second": 819.664, "eval_steps_per_second": 12.82, "step": 858 }, { "epoch": 2.39, "learning_rate": 4.492890995260664e-06, "loss": 0.1873, "step": 936 }, { "epoch": 2.39, "eval_accuracy": 0.87644, "eval_loss": 0.31038883328437805, "eval_runtime": 30.5889, "eval_samples_per_second": 817.29, "eval_steps_per_second": 12.782, "step": 936 }, { "epoch": 2.59, "learning_rate": 3.0142180094786734e-06, "loss": 0.1869, "step": 1014 }, { "epoch": 2.59, "eval_accuracy": 0.87708, "eval_loss": 0.3180868625640869, "eval_runtime": 30.5304, "eval_samples_per_second": 818.855, "eval_steps_per_second": 12.807, "step": 1014 }, { "epoch": 2.79, "learning_rate": 1.5355450236966826e-06, "loss": 0.1807, "step": 1092 }, { "epoch": 2.79, "eval_accuracy": 0.87636, "eval_loss": 0.3148181140422821, "eval_runtime": 30.541, "eval_samples_per_second": 818.572, "eval_steps_per_second": 12.802, "step": 1092 }, { "epoch": 2.99, "learning_rate": 5.6872037914691944e-08, "loss": 0.1967, "step": 1170 }, { "epoch": 2.99, "eval_accuracy": 0.8766, "eval_loss": 0.3140537142753601, "eval_runtime": 30.4059, "eval_samples_per_second": 822.21, "eval_steps_per_second": 12.859, "step": 1170 }, { "epoch": 3.0, "step": 1173, "total_flos": 2483763724800000.0, "train_loss": 0.2856195556334929, "train_runtime": 750.4873, "train_samples_per_second": 99.935, "train_steps_per_second": 1.563 } ], "logging_steps": 78, "max_steps": 1173, "num_train_epochs": 3, "save_steps": 78, "total_flos": 2483763724800000.0, "trial_name": null, "trial_params": null }