{ "best_metric": null, "best_model_checkpoint": null, "epoch": 19.999384615384617, "global_step": 8120, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "eval_loss": 0.3488372266292572, "eval_mse": 0.3488372266292572, "eval_runtime": 3.205, "eval_samples_per_second": 156.005, "eval_steps_per_second": 19.657, "step": 406 }, { "epoch": 1.23, "learning_rate": 1.1989617996694624e-05, "loss": 0.8208, "step": 500 }, { "epoch": 2.0, "eval_loss": 0.3489202857017517, "eval_mse": 0.3489202857017517, "eval_runtime": 3.0732, "eval_samples_per_second": 162.695, "eval_steps_per_second": 20.5, "step": 812 }, { "epoch": 2.46, "learning_rate": 1.1202897655704163e-05, "loss": 0.1606, "step": 1000 }, { "epoch": 3.0, "eval_loss": 0.3954257369041443, "eval_mse": 0.3954257369041443, "eval_runtime": 3.1411, "eval_samples_per_second": 159.182, "eval_steps_per_second": 20.057, "step": 1218 }, { "epoch": 3.69, "learning_rate": 1.0416177314713701e-05, "loss": 0.1174, "step": 1500 }, { "epoch": 4.0, "eval_loss": 0.22338131070137024, "eval_mse": 0.22338134050369263, "eval_runtime": 3.2318, "eval_samples_per_second": 154.71, "eval_steps_per_second": 19.493, "step": 1624 }, { "epoch": 4.93, "learning_rate": 9.62945697372324e-06, "loss": 0.0932, "step": 2000 }, { "epoch": 5.0, "eval_loss": 0.22499865293502808, "eval_mse": 0.22499865293502808, "eval_runtime": 3.0935, "eval_samples_per_second": 161.631, "eval_steps_per_second": 20.366, "step": 2030 }, { "epoch": 6.0, "eval_loss": 0.24710388481616974, "eval_mse": 0.24710386991500854, "eval_runtime": 3.1479, "eval_samples_per_second": 158.836, "eval_steps_per_second": 20.013, "step": 2436 }, { "epoch": 6.16, "learning_rate": 8.84273663273278e-06, "loss": 0.0729, "step": 2500 }, { "epoch": 7.0, "eval_loss": 0.16448178887367249, "eval_mse": 0.16448178887367249, "eval_runtime": 3.1325, "eval_samples_per_second": 159.618, "eval_steps_per_second": 20.112, "step": 2842 }, { "epoch": 7.39, "learning_rate": 8.056016291742319e-06, "loss": 0.0628, "step": 3000 }, { "epoch": 8.0, "eval_loss": 0.16940604150295258, "eval_mse": 0.16940604150295258, "eval_runtime": 3.1022, "eval_samples_per_second": 161.174, "eval_steps_per_second": 20.308, "step": 3248 }, { "epoch": 8.62, "learning_rate": 7.2692959507518586e-06, "loss": 0.0542, "step": 3500 }, { "epoch": 9.0, "eval_loss": 0.13145290315151215, "eval_mse": 0.13145291805267334, "eval_runtime": 3.1898, "eval_samples_per_second": 156.749, "eval_steps_per_second": 19.75, "step": 3654 }, { "epoch": 9.85, "learning_rate": 6.482575609761398e-06, "loss": 0.0477, "step": 4000 }, { "epoch": 10.0, "eval_loss": 0.16126562654972076, "eval_mse": 0.16126562654972076, "eval_runtime": 3.1805, "eval_samples_per_second": 157.209, "eval_steps_per_second": 19.808, "step": 4060 }, { "epoch": 11.0, "eval_loss": 0.13891386985778809, "eval_mse": 0.13891386985778809, "eval_runtime": 3.0894, "eval_samples_per_second": 161.843, "eval_steps_per_second": 20.392, "step": 4466 }, { "epoch": 11.08, "learning_rate": 5.695855268770936e-06, "loss": 0.0416, "step": 4500 }, { "epoch": 12.0, "eval_loss": 0.11093080043792725, "eval_mse": 0.11093080043792725, "eval_runtime": 3.178, "eval_samples_per_second": 157.331, "eval_steps_per_second": 19.824, "step": 4872 }, { "epoch": 12.32, "learning_rate": 4.909134927780475e-06, "loss": 0.0369, "step": 5000 }, { "epoch": 13.0, "eval_loss": 0.15438133478164673, "eval_mse": 0.15438130497932434, "eval_runtime": 3.1682, "eval_samples_per_second": 157.816, "eval_steps_per_second": 19.885, "step": 5278 }, { "epoch": 13.55, "learning_rate": 4.122414586790015e-06, "loss": 0.0337, "step": 5500 }, { "epoch": 14.0, "eval_loss": 0.11555473506450653, "eval_mse": 0.11555473506450653, "eval_runtime": 3.1267, "eval_samples_per_second": 159.911, "eval_steps_per_second": 20.149, "step": 5684 }, { "epoch": 14.78, "learning_rate": 3.335694245799554e-06, "loss": 0.0299, "step": 6000 }, { "epoch": 15.0, "eval_loss": 0.14125587046146393, "eval_mse": 0.14125585556030273, "eval_runtime": 3.1173, "eval_samples_per_second": 160.395, "eval_steps_per_second": 20.21, "step": 6090 }, { "epoch": 16.0, "eval_loss": 0.12824614346027374, "eval_mse": 0.12824612855911255, "eval_runtime": 3.141, "eval_samples_per_second": 159.185, "eval_steps_per_second": 20.057, "step": 6496 }, { "epoch": 16.01, "learning_rate": 2.5489739048090933e-06, "loss": 0.0288, "step": 6500 }, { "epoch": 17.0, "eval_loss": 0.11662615835666656, "eval_mse": 0.11662616580724716, "eval_runtime": 3.2355, "eval_samples_per_second": 154.536, "eval_steps_per_second": 19.471, "step": 6902 }, { "epoch": 17.24, "learning_rate": 1.7622535638186322e-06, "loss": 0.0263, "step": 7000 }, { "epoch": 18.0, "eval_loss": 0.12519659101963043, "eval_mse": 0.12519659101963043, "eval_runtime": 3.1453, "eval_samples_per_second": 158.968, "eval_steps_per_second": 20.03, "step": 7308 }, { "epoch": 18.47, "learning_rate": 9.755332228281715e-07, "loss": 0.0252, "step": 7500 }, { "epoch": 19.0, "eval_loss": 0.13828542828559875, "eval_mse": 0.13828542828559875, "eval_runtime": 3.121, "eval_samples_per_second": 160.203, "eval_steps_per_second": 20.186, "step": 7714 }, { "epoch": 19.7, "learning_rate": 1.8881288183771059e-07, "loss": 0.0236, "step": 8000 }, { "epoch": 20.0, "eval_loss": 0.11976799368858337, "eval_mse": 0.11976799368858337, "eval_runtime": 3.0971, "eval_samples_per_second": 161.439, "eval_steps_per_second": 20.341, "step": 8120 }, { "epoch": 20.0, "step": 8120, "total_flos": 8550922763068416.0, "train_loss": 0.10351273032832028, "train_runtime": 5208.5032, "train_samples_per_second": 24.959, "train_steps_per_second": 1.559 } ], "max_steps": 8120, "num_train_epochs": 20, "total_flos": 8550922763068416.0, "trial_name": null, "trial_params": null }