|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 19.9984, |
|
"global_step": 6240, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"eval_loss": 0.2721790075302124, |
|
"eval_mse": 0.2721790075302124, |
|
"eval_runtime": 13.7261, |
|
"eval_samples_per_second": 72.854, |
|
"eval_steps_per_second": 9.107, |
|
"step": 312 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 3.4640443499072686e-05, |
|
"loss": 0.2992, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_loss": 0.24081240594387054, |
|
"eval_mse": 0.24081240594387054, |
|
"eval_runtime": 13.7912, |
|
"eval_samples_per_second": 72.51, |
|
"eval_steps_per_second": 9.064, |
|
"step": 624 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_loss": 0.22090023756027222, |
|
"eval_mse": 0.22090023756027222, |
|
"eval_runtime": 13.7775, |
|
"eval_samples_per_second": 72.582, |
|
"eval_steps_per_second": 9.073, |
|
"step": 936 |
|
}, |
|
{ |
|
"epoch": 3.2, |
|
"learning_rate": 3.1622983263961825e-05, |
|
"loss": 0.1095, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_loss": 0.18391327559947968, |
|
"eval_mse": 0.18391327559947968, |
|
"eval_runtime": 13.7422, |
|
"eval_samples_per_second": 72.769, |
|
"eval_steps_per_second": 9.096, |
|
"step": 1248 |
|
}, |
|
{ |
|
"epoch": 4.81, |
|
"learning_rate": 2.860552302885096e-05, |
|
"loss": 0.0676, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_loss": 0.1835608184337616, |
|
"eval_mse": 0.1835608184337616, |
|
"eval_runtime": 13.7549, |
|
"eval_samples_per_second": 72.702, |
|
"eval_steps_per_second": 9.088, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_loss": 0.1830635964870453, |
|
"eval_mse": 0.1830635964870453, |
|
"eval_runtime": 13.799, |
|
"eval_samples_per_second": 72.469, |
|
"eval_steps_per_second": 9.059, |
|
"step": 1872 |
|
}, |
|
{ |
|
"epoch": 6.41, |
|
"learning_rate": 2.5588062793740103e-05, |
|
"loss": 0.0507, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_loss": 0.18448784947395325, |
|
"eval_mse": 0.18448786437511444, |
|
"eval_runtime": 13.7961, |
|
"eval_samples_per_second": 72.484, |
|
"eval_steps_per_second": 9.061, |
|
"step": 2184 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_loss": 0.18527822196483612, |
|
"eval_mse": 0.18527822196483612, |
|
"eval_runtime": 13.7973, |
|
"eval_samples_per_second": 72.478, |
|
"eval_steps_per_second": 9.06, |
|
"step": 2496 |
|
}, |
|
{ |
|
"epoch": 8.01, |
|
"learning_rate": 2.257060255862924e-05, |
|
"loss": 0.0402, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_loss": 0.18943239748477936, |
|
"eval_mse": 0.18943239748477936, |
|
"eval_runtime": 13.7202, |
|
"eval_samples_per_second": 72.885, |
|
"eval_steps_per_second": 9.111, |
|
"step": 2808 |
|
}, |
|
{ |
|
"epoch": 9.61, |
|
"learning_rate": 1.9553142323518382e-05, |
|
"loss": 0.0327, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_loss": 0.17714717984199524, |
|
"eval_mse": 0.17714717984199524, |
|
"eval_runtime": 13.7644, |
|
"eval_samples_per_second": 72.651, |
|
"eval_steps_per_second": 9.081, |
|
"step": 3120 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_loss": 0.18594859540462494, |
|
"eval_mse": 0.18594858050346375, |
|
"eval_runtime": 13.6617, |
|
"eval_samples_per_second": 73.197, |
|
"eval_steps_per_second": 9.15, |
|
"step": 3432 |
|
}, |
|
{ |
|
"epoch": 11.22, |
|
"learning_rate": 1.6535682088407518e-05, |
|
"loss": 0.0268, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_loss": 0.18873579800128937, |
|
"eval_mse": 0.18873581290245056, |
|
"eval_runtime": 13.705, |
|
"eval_samples_per_second": 72.966, |
|
"eval_steps_per_second": 9.121, |
|
"step": 3744 |
|
}, |
|
{ |
|
"epoch": 12.82, |
|
"learning_rate": 1.3518221853296657e-05, |
|
"loss": 0.0211, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_loss": 0.19864311814308167, |
|
"eval_mse": 0.19864313304424286, |
|
"eval_runtime": 13.7757, |
|
"eval_samples_per_second": 72.592, |
|
"eval_steps_per_second": 9.074, |
|
"step": 4056 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_loss": 0.17751657962799072, |
|
"eval_mse": 0.17751657962799072, |
|
"eval_runtime": 13.7818, |
|
"eval_samples_per_second": 72.56, |
|
"eval_steps_per_second": 9.07, |
|
"step": 4368 |
|
}, |
|
{ |
|
"epoch": 14.42, |
|
"learning_rate": 1.0500761618185796e-05, |
|
"loss": 0.0161, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_loss": 0.1854146122932434, |
|
"eval_mse": 0.1854146122932434, |
|
"eval_runtime": 13.8239, |
|
"eval_samples_per_second": 72.339, |
|
"eval_steps_per_second": 9.042, |
|
"step": 4680 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_loss": 0.1844572126865387, |
|
"eval_mse": 0.1844572275876999, |
|
"eval_runtime": 13.7435, |
|
"eval_samples_per_second": 72.762, |
|
"eval_steps_per_second": 9.095, |
|
"step": 4992 |
|
}, |
|
{ |
|
"epoch": 16.03, |
|
"learning_rate": 7.483301383074935e-06, |
|
"loss": 0.0126, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_loss": 0.18561576306819916, |
|
"eval_mse": 0.18561576306819916, |
|
"eval_runtime": 13.7414, |
|
"eval_samples_per_second": 72.773, |
|
"eval_steps_per_second": 9.097, |
|
"step": 5304 |
|
}, |
|
{ |
|
"epoch": 17.63, |
|
"learning_rate": 4.465841147964074e-06, |
|
"loss": 0.01, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_loss": 0.17724011838436127, |
|
"eval_mse": 0.17724011838436127, |
|
"eval_runtime": 13.7177, |
|
"eval_samples_per_second": 72.898, |
|
"eval_steps_per_second": 9.112, |
|
"step": 5616 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_loss": 0.18078790605068207, |
|
"eval_mse": 0.18078790605068207, |
|
"eval_runtime": 13.8059, |
|
"eval_samples_per_second": 72.433, |
|
"eval_steps_per_second": 9.054, |
|
"step": 5928 |
|
}, |
|
{ |
|
"epoch": 19.23, |
|
"learning_rate": 1.4483809128532133e-06, |
|
"loss": 0.008, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_loss": 0.18189798295497894, |
|
"eval_mse": 0.18189798295497894, |
|
"eval_runtime": 13.7884, |
|
"eval_samples_per_second": 72.524, |
|
"eval_steps_per_second": 9.066, |
|
"step": 6240 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"step": 6240, |
|
"total_flos": 2.3296341506770944e+16, |
|
"train_loss": 0.05592835318201628, |
|
"train_runtime": 6478.1274, |
|
"train_samples_per_second": 15.437, |
|
"train_steps_per_second": 0.963 |
|
} |
|
], |
|
"max_steps": 6240, |
|
"num_train_epochs": 20, |
|
"total_flos": 2.3296341506770944e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|