|
{ |
|
"best_metric": 1.0038142204284668, |
|
"best_model_checkpoint": "miner_id_24/checkpoint-800", |
|
"epoch": 0.6780379277465833, |
|
"eval_steps": 100, |
|
"global_step": 800, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0008475474096832291, |
|
"eval_loss": 1.7017370462417603, |
|
"eval_runtime": 592.1509, |
|
"eval_samples_per_second": 3.357, |
|
"eval_steps_per_second": 0.839, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.04237737048416146, |
|
"grad_norm": 180.50335693359375, |
|
"learning_rate": 0.0001085, |
|
"loss": 9.6737, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.08475474096832292, |
|
"grad_norm": 35.1514778137207, |
|
"learning_rate": 0.000217, |
|
"loss": 9.1546, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.08475474096832292, |
|
"eval_loss": 1.4327971935272217, |
|
"eval_runtime": 591.9001, |
|
"eval_samples_per_second": 3.359, |
|
"eval_steps_per_second": 0.84, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.12713211145248438, |
|
"grad_norm": 33.78520584106445, |
|
"learning_rate": 0.00021427967847172783, |
|
"loss": 9.1724, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.16950948193664583, |
|
"grad_norm": 30.819656372070312, |
|
"learning_rate": 0.00020625512216741247, |
|
"loss": 9.0184, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.16950948193664583, |
|
"eval_loss": 1.431421160697937, |
|
"eval_runtime": 593.9151, |
|
"eval_samples_per_second": 3.347, |
|
"eval_steps_per_second": 0.837, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.2118868524208073, |
|
"grad_norm": 31.55472183227539, |
|
"learning_rate": 0.00019332871584778124, |
|
"loss": 9.0235, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.25426422290496875, |
|
"grad_norm": 39.07125473022461, |
|
"learning_rate": 0.0001761486435016726, |
|
"loss": 9.0832, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.25426422290496875, |
|
"eval_loss": 1.3427064418792725, |
|
"eval_runtime": 594.1739, |
|
"eval_samples_per_second": 3.346, |
|
"eval_steps_per_second": 0.836, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.2966415933891302, |
|
"grad_norm": 29.832670211791992, |
|
"learning_rate": 0.00015557638569425503, |
|
"loss": 8.9316, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.33901896387329167, |
|
"grad_norm": 31.09575653076172, |
|
"learning_rate": 0.0001326435213342601, |
|
"loss": 8.8142, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.33901896387329167, |
|
"eval_loss": 1.2957122325897217, |
|
"eval_runtime": 592.8915, |
|
"eval_samples_per_second": 3.353, |
|
"eval_steps_per_second": 0.838, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.3813963343574531, |
|
"grad_norm": 23.03346061706543, |
|
"learning_rate": 0.0001085, |
|
"loss": 8.8109, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.4237737048416146, |
|
"grad_norm": 32.9949951171875, |
|
"learning_rate": 8.435647866573989e-05, |
|
"loss": 8.6298, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.4237737048416146, |
|
"eval_loss": 1.1177974939346313, |
|
"eval_runtime": 592.3944, |
|
"eval_samples_per_second": 3.356, |
|
"eval_steps_per_second": 0.839, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.46615107532577604, |
|
"grad_norm": 25.86236000061035, |
|
"learning_rate": 6.142361430574494e-05, |
|
"loss": 8.3883, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.5085284458099375, |
|
"grad_norm": 25.12655258178711, |
|
"learning_rate": 4.085135649832741e-05, |
|
"loss": 8.4129, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.5085284458099375, |
|
"eval_loss": 1.041265606880188, |
|
"eval_runtime": 593.3051, |
|
"eval_samples_per_second": 3.351, |
|
"eval_steps_per_second": 0.838, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.550905816294099, |
|
"grad_norm": 45.71076965332031, |
|
"learning_rate": 2.3671284152218764e-05, |
|
"loss": 8.1111, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.5932831867782604, |
|
"grad_norm": 22.455829620361328, |
|
"learning_rate": 1.0744877832587534e-05, |
|
"loss": 8.1041, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.5932831867782604, |
|
"eval_loss": 1.0079442262649536, |
|
"eval_runtime": 591.8525, |
|
"eval_samples_per_second": 3.359, |
|
"eval_steps_per_second": 0.84, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.6356605572624219, |
|
"grad_norm": 25.23514747619629, |
|
"learning_rate": 2.720321528272137e-06, |
|
"loss": 8.1489, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.6780379277465833, |
|
"grad_norm": 25.03769302368164, |
|
"learning_rate": 0.0, |
|
"loss": 8.0241, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.6780379277465833, |
|
"eval_loss": 1.0038142204284668, |
|
"eval_runtime": 594.1032, |
|
"eval_samples_per_second": 3.346, |
|
"eval_steps_per_second": 0.837, |
|
"step": 800 |
|
} |
|
], |
|
"logging_steps": 50, |
|
"max_steps": 800, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 3, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 0 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 3.413778734304461e+18, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|