|
{ |
|
"best_metric": 1.3815141916275024, |
|
"best_model_checkpoint": "miner_id_24/checkpoint-2000", |
|
"epoch": 0.32159511175430133, |
|
"eval_steps": 500, |
|
"global_step": 2000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.00016079755587715066, |
|
"eval_loss": 1.9122439622879028, |
|
"eval_runtime": 431.3676, |
|
"eval_samples_per_second": 24.283, |
|
"eval_steps_per_second": 6.071, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.008039877793857533, |
|
"grad_norm": 1.275758147239685, |
|
"learning_rate": 0.000106, |
|
"loss": 1.7445, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.016079755587715065, |
|
"grad_norm": 1.1742221117019653, |
|
"learning_rate": 0.000212, |
|
"loss": 1.6522, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.0241196333815726, |
|
"grad_norm": 0.917005717754364, |
|
"learning_rate": 0.00021163795625870702, |
|
"loss": 1.6312, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.03215951117543013, |
|
"grad_norm": 0.8962546586990356, |
|
"learning_rate": 0.00021055429816068858, |
|
"loss": 1.5945, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.04019938896928767, |
|
"grad_norm": 0.9346632361412048, |
|
"learning_rate": 0.00020875642818956903, |
|
"loss": 1.5393, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.0482392667631452, |
|
"grad_norm": 0.7735784649848938, |
|
"learning_rate": 0.00020625662762026727, |
|
"loss": 1.5295, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.05627914455700273, |
|
"grad_norm": 0.748665988445282, |
|
"learning_rate": 0.0002030719726254361, |
|
"loss": 1.5226, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.06431902235086026, |
|
"grad_norm": 0.8975337743759155, |
|
"learning_rate": 0.00019922421762788784, |
|
"loss": 1.511, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.0723589001447178, |
|
"grad_norm": 0.8698158860206604, |
|
"learning_rate": 0.00019473964669582803, |
|
"loss": 1.5249, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.08039877793857533, |
|
"grad_norm": 0.792420506477356, |
|
"learning_rate": 0.00018964889399601773, |
|
"loss": 1.488, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.08039877793857533, |
|
"eval_loss": 1.5327790975570679, |
|
"eval_runtime": 431.164, |
|
"eval_samples_per_second": 24.295, |
|
"eval_steps_per_second": 6.074, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.08843865573243287, |
|
"grad_norm": 0.7909814715385437, |
|
"learning_rate": 0.00018398673453135197, |
|
"loss": 1.474, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.0964785335262904, |
|
"grad_norm": 0.7088127136230469, |
|
"learning_rate": 0.00017779184659232858, |
|
"loss": 1.483, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.10451841132014793, |
|
"grad_norm": 0.7714433670043945, |
|
"learning_rate": 0.0001711065475451048, |
|
"loss": 1.4852, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.11255828911400546, |
|
"grad_norm": 1.0182313919067383, |
|
"learning_rate": 0.00016397650476097727, |
|
"loss": 1.4443, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.120598166907863, |
|
"grad_norm": 0.7473872900009155, |
|
"learning_rate": 0.00015645042366192982, |
|
"loss": 1.452, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.12863804470172052, |
|
"grad_norm": 0.789179265499115, |
|
"learning_rate": 0.0001485797150132148, |
|
"loss": 1.4872, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.13667792249557806, |
|
"grad_norm": 0.8989229202270508, |
|
"learning_rate": 0.00014041814373569648, |
|
"loss": 1.4354, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.1447178002894356, |
|
"grad_norm": 0.7713472843170166, |
|
"learning_rate": 0.00013202146163692472, |
|
"loss": 1.4508, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.15275767808329313, |
|
"grad_norm": 0.7908226847648621, |
|
"learning_rate": 0.0001234470265697578, |
|
"loss": 1.4426, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.16079755587715067, |
|
"grad_norm": 0.792984127998352, |
|
"learning_rate": 0.00011475341062006725, |
|
"loss": 1.4087, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.16079755587715067, |
|
"eval_loss": 1.4277774095535278, |
|
"eval_runtime": 431.2474, |
|
"eval_samples_per_second": 24.29, |
|
"eval_steps_per_second": 6.073, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.1688374336710082, |
|
"grad_norm": 0.7347446084022522, |
|
"learning_rate": 0.000106, |
|
"loss": 1.4432, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.17687731146486574, |
|
"grad_norm": 0.8061316013336182, |
|
"learning_rate": 9.724658937993278e-05, |
|
"loss": 1.4889, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.18491718925872327, |
|
"grad_norm": 0.9965606927871704, |
|
"learning_rate": 8.855297343024219e-05, |
|
"loss": 1.4394, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.1929570670525808, |
|
"grad_norm": 0.6851484179496765, |
|
"learning_rate": 7.99785383630753e-05, |
|
"loss": 1.416, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.20099694484643835, |
|
"grad_norm": 0.7076012492179871, |
|
"learning_rate": 7.158185626430357e-05, |
|
"loss": 1.4108, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.20903682264029586, |
|
"grad_norm": 0.818530797958374, |
|
"learning_rate": 6.342028498678525e-05, |
|
"loss": 1.4205, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.2170767004341534, |
|
"grad_norm": 0.7673355937004089, |
|
"learning_rate": 5.5549576338070204e-05, |
|
"loss": 1.4441, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.22511657822801093, |
|
"grad_norm": 0.7837012410163879, |
|
"learning_rate": 4.802349523902277e-05, |
|
"loss": 1.4046, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.23315645602186846, |
|
"grad_norm": 0.9885613918304443, |
|
"learning_rate": 4.0893452454895215e-05, |
|
"loss": 1.3865, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 0.241196333815726, |
|
"grad_norm": 0.6985939145088196, |
|
"learning_rate": 3.420815340767147e-05, |
|
"loss": 1.3805, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.241196333815726, |
|
"eval_loss": 1.3898102045059204, |
|
"eval_runtime": 431.5472, |
|
"eval_samples_per_second": 24.273, |
|
"eval_steps_per_second": 6.069, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.24923621160958354, |
|
"grad_norm": 0.6956173777580261, |
|
"learning_rate": 2.8013265468648052e-05, |
|
"loss": 1.398, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 0.25727608940344104, |
|
"grad_norm": 0.8307960629463196, |
|
"learning_rate": 2.2351106003982295e-05, |
|
"loss": 1.38, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.2653159671972986, |
|
"grad_norm": 0.6814916729927063, |
|
"learning_rate": 1.7260353304171974e-05, |
|
"loss": 1.4301, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 0.2733558449911561, |
|
"grad_norm": 0.8441957235336304, |
|
"learning_rate": 1.277578237211217e-05, |
|
"loss": 1.422, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.2813957227850137, |
|
"grad_norm": 0.7750621438026428, |
|
"learning_rate": 8.928027374563904e-06, |
|
"loss": 1.3458, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 0.2894356005788712, |
|
"grad_norm": 0.7339411973953247, |
|
"learning_rate": 5.743372379732728e-06, |
|
"loss": 1.3787, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.29747547837272875, |
|
"grad_norm": 0.7997561693191528, |
|
"learning_rate": 3.2435718104309803e-06, |
|
"loss": 1.3861, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 0.30551535616658626, |
|
"grad_norm": 0.7548067569732666, |
|
"learning_rate": 1.4457018393114339e-06, |
|
"loss": 1.3984, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.3135552339604438, |
|
"grad_norm": 0.824309766292572, |
|
"learning_rate": 3.620437412929962e-07, |
|
"loss": 1.3905, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 0.32159511175430133, |
|
"grad_norm": 0.7960425019264221, |
|
"learning_rate": 0.0, |
|
"loss": 1.3743, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.32159511175430133, |
|
"eval_loss": 1.3815141916275024, |
|
"eval_runtime": 431.4937, |
|
"eval_samples_per_second": 24.276, |
|
"eval_steps_per_second": 6.07, |
|
"step": 2000 |
|
} |
|
], |
|
"logging_steps": 50, |
|
"max_steps": 2000, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 3, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 0 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 5.4428946137088e+17, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|