|
{ |
|
"best_metric": 0.09226811677217484, |
|
"best_model_checkpoint": "miner_id_24/checkpoint-500", |
|
"epoch": 0.29459419649432905, |
|
"eval_steps": 500, |
|
"global_step": 500, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0005891883929886581, |
|
"eval_loss": 1.4878911972045898, |
|
"eval_runtime": 302.6677, |
|
"eval_samples_per_second": 9.446, |
|
"eval_steps_per_second": 2.362, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.029459419649432907, |
|
"grad_norm": 17.25750160217285, |
|
"learning_rate": 0.0001035, |
|
"loss": 3.0964, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.058918839298865815, |
|
"grad_norm": 15.095850944519043, |
|
"learning_rate": 0.000207, |
|
"loss": 1.9876, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.08837825894829872, |
|
"grad_norm": 13.579780578613281, |
|
"learning_rate": 0.00019912153161491818, |
|
"loss": 1.9922, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.11783767859773163, |
|
"grad_norm": 11.583715438842773, |
|
"learning_rate": 0.00017668555185280767, |
|
"loss": 1.6798, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.14729709824716453, |
|
"grad_norm": 12.069087982177734, |
|
"learning_rate": 0.0001431077352497868, |
|
"loss": 1.5442, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.17675651789659744, |
|
"grad_norm": 11.045920372009277, |
|
"learning_rate": 0.0001035, |
|
"loss": 1.2986, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.20621593754603035, |
|
"grad_norm": 8.735687255859375, |
|
"learning_rate": 6.389226475021321e-05, |
|
"loss": 1.0951, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.23567535719546326, |
|
"grad_norm": 10.485920906066895, |
|
"learning_rate": 3.0314448147192338e-05, |
|
"loss": 0.8972, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.26513477684489617, |
|
"grad_norm": 6.716992378234863, |
|
"learning_rate": 7.878468385081822e-06, |
|
"loss": 0.8396, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.29459419649432905, |
|
"grad_norm": 8.605260848999023, |
|
"learning_rate": 0.0, |
|
"loss": 0.7214, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.29459419649432905, |
|
"eval_loss": 0.09226811677217484, |
|
"eval_runtime": 302.2865, |
|
"eval_samples_per_second": 9.458, |
|
"eval_steps_per_second": 2.365, |
|
"step": 500 |
|
} |
|
], |
|
"logging_steps": 50, |
|
"max_steps": 500, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 3, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 0 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 7.159871941996708e+17, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|