|
{ |
|
"best_metric": 0.6618533730506897, |
|
"best_model_checkpoint": "miner_id_24/checkpoint-300", |
|
"epoch": 3.7854889589905363, |
|
"eval_steps": 150, |
|
"global_step": 600, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.006309148264984227, |
|
"eval_loss": 1.5034849643707275, |
|
"eval_runtime": 12.1555, |
|
"eval_samples_per_second": 11.024, |
|
"eval_steps_per_second": 2.797, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.31545741324921134, |
|
"grad_norm": 1.4248030185699463, |
|
"learning_rate": 0.0002, |
|
"loss": 4.546, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.6309148264984227, |
|
"grad_norm": 1.4725761413574219, |
|
"learning_rate": 0.00019947686475910655, |
|
"loss": 3.352, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.9463722397476341, |
|
"grad_norm": 2.3365869522094727, |
|
"learning_rate": 0.00019791293244603142, |
|
"loss": 3.0493, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.9463722397476341, |
|
"eval_loss": 0.7383638024330139, |
|
"eval_runtime": 12.6059, |
|
"eval_samples_per_second": 10.63, |
|
"eval_steps_per_second": 2.697, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 1.2618296529968454, |
|
"grad_norm": 1.8727226257324219, |
|
"learning_rate": 0.0001953245660229215, |
|
"loss": 2.5219, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 1.5772870662460567, |
|
"grad_norm": 1.7076104879379272, |
|
"learning_rate": 0.0001917388468036222, |
|
"loss": 2.446, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 1.8927444794952681, |
|
"grad_norm": 1.6955405473709106, |
|
"learning_rate": 0.00018719329110988486, |
|
"loss": 2.3597, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 1.8927444794952681, |
|
"eval_loss": 0.6618533730506897, |
|
"eval_runtime": 12.5147, |
|
"eval_samples_per_second": 10.707, |
|
"eval_steps_per_second": 2.717, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 2.2082018927444795, |
|
"grad_norm": 2.74511981010437, |
|
"learning_rate": 0.00018173545774916627, |
|
"loss": 1.9396, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 2.5236593059936907, |
|
"grad_norm": 1.6998424530029297, |
|
"learning_rate": 0.0001754224504208647, |
|
"loss": 1.68, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 2.8391167192429023, |
|
"grad_norm": 1.6890298128128052, |
|
"learning_rate": 0.00016832032025716921, |
|
"loss": 1.7147, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 2.8391167192429023, |
|
"eval_loss": 0.6622503995895386, |
|
"eval_runtime": 12.5093, |
|
"eval_samples_per_second": 10.712, |
|
"eval_steps_per_second": 2.718, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 3.1545741324921135, |
|
"grad_norm": 2.3724846839904785, |
|
"learning_rate": 0.00016050337474956067, |
|
"loss": 1.5111, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 3.470031545741325, |
|
"grad_norm": 3.490664482116699, |
|
"learning_rate": 0.00015205340029146255, |
|
"loss": 1.0948, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 3.7854889589905363, |
|
"grad_norm": 2.047959566116333, |
|
"learning_rate": 0.00014305880647134847, |
|
"loss": 1.179, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 3.7854889589905363, |
|
"eval_loss": 0.690345287322998, |
|
"eval_runtime": 12.6156, |
|
"eval_samples_per_second": 10.622, |
|
"eval_steps_per_second": 2.695, |
|
"step": 600 |
|
} |
|
], |
|
"logging_steps": 50, |
|
"max_steps": 1585, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 11, |
|
"save_steps": 150, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 2, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 2 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 6.548231660293325e+17, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|