{ "best_metric": 1.3815141916275024, "best_model_checkpoint": "miner_id_24/checkpoint-2000", "epoch": 0.32159511175430133, "eval_steps": 500, "global_step": 2000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.00016079755587715066, "eval_loss": 1.9122439622879028, "eval_runtime": 431.3676, "eval_samples_per_second": 24.283, "eval_steps_per_second": 6.071, "step": 1 }, { "epoch": 0.008039877793857533, "grad_norm": 1.275758147239685, "learning_rate": 0.000106, "loss": 1.7445, "step": 50 }, { "epoch": 0.016079755587715065, "grad_norm": 1.1742221117019653, "learning_rate": 0.000212, "loss": 1.6522, "step": 100 }, { "epoch": 0.0241196333815726, "grad_norm": 0.917005717754364, "learning_rate": 0.00021163795625870702, "loss": 1.6312, "step": 150 }, { "epoch": 0.03215951117543013, "grad_norm": 0.8962546586990356, "learning_rate": 0.00021055429816068858, "loss": 1.5945, "step": 200 }, { "epoch": 0.04019938896928767, "grad_norm": 0.9346632361412048, "learning_rate": 0.00020875642818956903, "loss": 1.5393, "step": 250 }, { "epoch": 0.0482392667631452, "grad_norm": 0.7735784649848938, "learning_rate": 0.00020625662762026727, "loss": 1.5295, "step": 300 }, { "epoch": 0.05627914455700273, "grad_norm": 0.748665988445282, "learning_rate": 0.0002030719726254361, "loss": 1.5226, "step": 350 }, { "epoch": 0.06431902235086026, "grad_norm": 0.8975337743759155, "learning_rate": 0.00019922421762788784, "loss": 1.511, "step": 400 }, { "epoch": 0.0723589001447178, "grad_norm": 0.8698158860206604, "learning_rate": 0.00019473964669582803, "loss": 1.5249, "step": 450 }, { "epoch": 0.08039877793857533, "grad_norm": 0.792420506477356, "learning_rate": 0.00018964889399601773, "loss": 1.488, "step": 500 }, { "epoch": 0.08039877793857533, "eval_loss": 1.5327790975570679, "eval_runtime": 431.164, "eval_samples_per_second": 24.295, "eval_steps_per_second": 6.074, "step": 500 }, { "epoch": 0.08843865573243287, "grad_norm": 0.7909814715385437, "learning_rate": 0.00018398673453135197, "loss": 1.474, "step": 550 }, { "epoch": 0.0964785335262904, "grad_norm": 0.7088127136230469, "learning_rate": 0.00017779184659232858, "loss": 1.483, "step": 600 }, { "epoch": 0.10451841132014793, "grad_norm": 0.7714433670043945, "learning_rate": 0.0001711065475451048, "loss": 1.4852, "step": 650 }, { "epoch": 0.11255828911400546, "grad_norm": 1.0182313919067383, "learning_rate": 0.00016397650476097727, "loss": 1.4443, "step": 700 }, { "epoch": 0.120598166907863, "grad_norm": 0.7473872900009155, "learning_rate": 0.00015645042366192982, "loss": 1.452, "step": 750 }, { "epoch": 0.12863804470172052, "grad_norm": 0.789179265499115, "learning_rate": 0.0001485797150132148, "loss": 1.4872, "step": 800 }, { "epoch": 0.13667792249557806, "grad_norm": 0.8989229202270508, "learning_rate": 0.00014041814373569648, "loss": 1.4354, "step": 850 }, { "epoch": 0.1447178002894356, "grad_norm": 0.7713472843170166, "learning_rate": 0.00013202146163692472, "loss": 1.4508, "step": 900 }, { "epoch": 0.15275767808329313, "grad_norm": 0.7908226847648621, "learning_rate": 0.0001234470265697578, "loss": 1.4426, "step": 950 }, { "epoch": 0.16079755587715067, "grad_norm": 0.792984127998352, "learning_rate": 0.00011475341062006725, "loss": 1.4087, "step": 1000 }, { "epoch": 0.16079755587715067, "eval_loss": 1.4277774095535278, "eval_runtime": 431.2474, "eval_samples_per_second": 24.29, "eval_steps_per_second": 6.073, "step": 1000 }, { "epoch": 0.1688374336710082, "grad_norm": 0.7347446084022522, "learning_rate": 0.000106, "loss": 1.4432, "step": 1050 }, { "epoch": 0.17687731146486574, "grad_norm": 0.8061316013336182, "learning_rate": 9.724658937993278e-05, "loss": 1.4889, "step": 1100 }, { "epoch": 0.18491718925872327, "grad_norm": 0.9965606927871704, "learning_rate": 8.855297343024219e-05, "loss": 1.4394, "step": 1150 }, { "epoch": 0.1929570670525808, "grad_norm": 0.6851484179496765, "learning_rate": 7.99785383630753e-05, "loss": 1.416, "step": 1200 }, { "epoch": 0.20099694484643835, "grad_norm": 0.7076012492179871, "learning_rate": 7.158185626430357e-05, "loss": 1.4108, "step": 1250 }, { "epoch": 0.20903682264029586, "grad_norm": 0.818530797958374, "learning_rate": 6.342028498678525e-05, "loss": 1.4205, "step": 1300 }, { "epoch": 0.2170767004341534, "grad_norm": 0.7673355937004089, "learning_rate": 5.5549576338070204e-05, "loss": 1.4441, "step": 1350 }, { "epoch": 0.22511657822801093, "grad_norm": 0.7837012410163879, "learning_rate": 4.802349523902277e-05, "loss": 1.4046, "step": 1400 }, { "epoch": 0.23315645602186846, "grad_norm": 0.9885613918304443, "learning_rate": 4.0893452454895215e-05, "loss": 1.3865, "step": 1450 }, { "epoch": 0.241196333815726, "grad_norm": 0.6985939145088196, "learning_rate": 3.420815340767147e-05, "loss": 1.3805, "step": 1500 }, { "epoch": 0.241196333815726, "eval_loss": 1.3898102045059204, "eval_runtime": 431.5472, "eval_samples_per_second": 24.273, "eval_steps_per_second": 6.069, "step": 1500 }, { "epoch": 0.24923621160958354, "grad_norm": 0.6956173777580261, "learning_rate": 2.8013265468648052e-05, "loss": 1.398, "step": 1550 }, { "epoch": 0.25727608940344104, "grad_norm": 0.8307960629463196, "learning_rate": 2.2351106003982295e-05, "loss": 1.38, "step": 1600 }, { "epoch": 0.2653159671972986, "grad_norm": 0.6814916729927063, "learning_rate": 1.7260353304171974e-05, "loss": 1.4301, "step": 1650 }, { "epoch": 0.2733558449911561, "grad_norm": 0.8441957235336304, "learning_rate": 1.277578237211217e-05, "loss": 1.422, "step": 1700 }, { "epoch": 0.2813957227850137, "grad_norm": 0.7750621438026428, "learning_rate": 8.928027374563904e-06, "loss": 1.3458, "step": 1750 }, { "epoch": 0.2894356005788712, "grad_norm": 0.7339411973953247, "learning_rate": 5.743372379732728e-06, "loss": 1.3787, "step": 1800 }, { "epoch": 0.29747547837272875, "grad_norm": 0.7997561693191528, "learning_rate": 3.2435718104309803e-06, "loss": 1.3861, "step": 1850 }, { "epoch": 0.30551535616658626, "grad_norm": 0.7548067569732666, "learning_rate": 1.4457018393114339e-06, "loss": 1.3984, "step": 1900 }, { "epoch": 0.3135552339604438, "grad_norm": 0.824309766292572, "learning_rate": 3.620437412929962e-07, "loss": 1.3905, "step": 1950 }, { "epoch": 0.32159511175430133, "grad_norm": 0.7960425019264221, "learning_rate": 0.0, "loss": 1.3743, "step": 2000 }, { "epoch": 0.32159511175430133, "eval_loss": 1.3815141916275024, "eval_runtime": 431.4937, "eval_samples_per_second": 24.276, "eval_steps_per_second": 6.07, "step": 2000 } ], "logging_steps": 50, "max_steps": 2000, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 500, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 3, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 5.4428946137088e+17, "train_batch_size": 4, "trial_name": null, "trial_params": null }