aleegis12's picture
Training in progress, step 600, checkpoint
b502a52 verified
{
"best_metric": 0.6618533730506897,
"best_model_checkpoint": "miner_id_24/checkpoint-300",
"epoch": 3.7854889589905363,
"eval_steps": 150,
"global_step": 600,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.006309148264984227,
"eval_loss": 1.5034849643707275,
"eval_runtime": 12.1555,
"eval_samples_per_second": 11.024,
"eval_steps_per_second": 2.797,
"step": 1
},
{
"epoch": 0.31545741324921134,
"grad_norm": 1.4248030185699463,
"learning_rate": 0.0002,
"loss": 4.546,
"step": 50
},
{
"epoch": 0.6309148264984227,
"grad_norm": 1.4725761413574219,
"learning_rate": 0.00019947686475910655,
"loss": 3.352,
"step": 100
},
{
"epoch": 0.9463722397476341,
"grad_norm": 2.3365869522094727,
"learning_rate": 0.00019791293244603142,
"loss": 3.0493,
"step": 150
},
{
"epoch": 0.9463722397476341,
"eval_loss": 0.7383638024330139,
"eval_runtime": 12.6059,
"eval_samples_per_second": 10.63,
"eval_steps_per_second": 2.697,
"step": 150
},
{
"epoch": 1.2618296529968454,
"grad_norm": 1.8727226257324219,
"learning_rate": 0.0001953245660229215,
"loss": 2.5219,
"step": 200
},
{
"epoch": 1.5772870662460567,
"grad_norm": 1.7076104879379272,
"learning_rate": 0.0001917388468036222,
"loss": 2.446,
"step": 250
},
{
"epoch": 1.8927444794952681,
"grad_norm": 1.6955405473709106,
"learning_rate": 0.00018719329110988486,
"loss": 2.3597,
"step": 300
},
{
"epoch": 1.8927444794952681,
"eval_loss": 0.6618533730506897,
"eval_runtime": 12.5147,
"eval_samples_per_second": 10.707,
"eval_steps_per_second": 2.717,
"step": 300
},
{
"epoch": 2.2082018927444795,
"grad_norm": 2.74511981010437,
"learning_rate": 0.00018173545774916627,
"loss": 1.9396,
"step": 350
},
{
"epoch": 2.5236593059936907,
"grad_norm": 1.6998424530029297,
"learning_rate": 0.0001754224504208647,
"loss": 1.68,
"step": 400
},
{
"epoch": 2.8391167192429023,
"grad_norm": 1.6890298128128052,
"learning_rate": 0.00016832032025716921,
"loss": 1.7147,
"step": 450
},
{
"epoch": 2.8391167192429023,
"eval_loss": 0.6622503995895386,
"eval_runtime": 12.5093,
"eval_samples_per_second": 10.712,
"eval_steps_per_second": 2.718,
"step": 450
},
{
"epoch": 3.1545741324921135,
"grad_norm": 2.3724846839904785,
"learning_rate": 0.00016050337474956067,
"loss": 1.5111,
"step": 500
},
{
"epoch": 3.470031545741325,
"grad_norm": 3.490664482116699,
"learning_rate": 0.00015205340029146255,
"loss": 1.0948,
"step": 550
},
{
"epoch": 3.7854889589905363,
"grad_norm": 2.047959566116333,
"learning_rate": 0.00014305880647134847,
"loss": 1.179,
"step": 600
},
{
"epoch": 3.7854889589905363,
"eval_loss": 0.690345287322998,
"eval_runtime": 12.6156,
"eval_samples_per_second": 10.622,
"eval_steps_per_second": 2.695,
"step": 600
}
],
"logging_steps": 50,
"max_steps": 1585,
"num_input_tokens_seen": 0,
"num_train_epochs": 11,
"save_steps": 150,
"stateful_callbacks": {
"EarlyStoppingCallback": {
"args": {
"early_stopping_patience": 2,
"early_stopping_threshold": 0.0
},
"attributes": {
"early_stopping_patience_counter": 2
}
},
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 6.548231660293325e+17,
"train_batch_size": 4,
"trial_name": null,
"trial_params": null
}