|
{ |
|
"best_metric": 0.26675283908843994, |
|
"best_model_checkpoint": "miner_id_24/checkpoint-500", |
|
"epoch": 0.24394706348722328, |
|
"eval_steps": 500, |
|
"global_step": 500, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.00048789412697444653, |
|
"eval_loss": 0.41620752215385437, |
|
"eval_runtime": 378.5304, |
|
"eval_samples_per_second": 9.122, |
|
"eval_steps_per_second": 2.283, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.024394706348722327, |
|
"grad_norm": 0.791959822177887, |
|
"learning_rate": 0.000109, |
|
"loss": 0.3028, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.048789412697444655, |
|
"grad_norm": 0.7407991886138916, |
|
"learning_rate": 0.000218, |
|
"loss": 0.2629, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.07318411904616698, |
|
"grad_norm": 0.8837946653366089, |
|
"learning_rate": 0.00020970286904373027, |
|
"loss": 0.2861, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.09757882539488931, |
|
"grad_norm": 0.6867163181304932, |
|
"learning_rate": 0.00018607463914933369, |
|
"loss": 0.27, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.12197353174361164, |
|
"grad_norm": 0.7695136070251465, |
|
"learning_rate": 0.0001507124941277948, |
|
"loss": 0.2802, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.14636823809233396, |
|
"grad_norm": 0.796603798866272, |
|
"learning_rate": 0.000109, |
|
"loss": 0.2849, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.1707629444410563, |
|
"grad_norm": 0.6176795363426208, |
|
"learning_rate": 6.728750587220522e-05, |
|
"loss": 0.2807, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.19515765078977862, |
|
"grad_norm": 0.6259704828262329, |
|
"learning_rate": 3.192536085066633e-05, |
|
"loss": 0.2805, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.21955235713850094, |
|
"grad_norm": 0.8086729049682617, |
|
"learning_rate": 8.297130956269746e-06, |
|
"loss": 0.2778, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.24394706348722328, |
|
"grad_norm": 0.6864873170852661, |
|
"learning_rate": 0.0, |
|
"loss": 0.2724, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.24394706348722328, |
|
"eval_loss": 0.26675283908843994, |
|
"eval_runtime": 378.5603, |
|
"eval_samples_per_second": 9.121, |
|
"eval_steps_per_second": 2.282, |
|
"step": 500 |
|
} |
|
], |
|
"logging_steps": 50, |
|
"max_steps": 500, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 3, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 0 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 7.54269674274816e+17, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|