{
  "best_metric": 1.3815141916275024,
  "best_model_checkpoint": "miner_id_24/checkpoint-2000",
  "epoch": 0.32159511175430133,
  "eval_steps": 500,
  "global_step": 2000,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "epoch": 0.00016079755587715066,
      "eval_loss": 1.9122439622879028,
      "eval_runtime": 431.3676,
      "eval_samples_per_second": 24.283,
      "eval_steps_per_second": 6.071,
      "step": 1
    },
    {
      "epoch": 0.008039877793857533,
      "grad_norm": 1.275758147239685,
      "learning_rate": 0.000106,
      "loss": 1.7445,
      "step": 50
    },
    {
      "epoch": 0.016079755587715065,
      "grad_norm": 1.1742221117019653,
      "learning_rate": 0.000212,
      "loss": 1.6522,
      "step": 100
    },
    {
      "epoch": 0.0241196333815726,
      "grad_norm": 0.917005717754364,
      "learning_rate": 0.00021163795625870702,
      "loss": 1.6312,
      "step": 150
    },
    {
      "epoch": 0.03215951117543013,
      "grad_norm": 0.8962546586990356,
      "learning_rate": 0.00021055429816068858,
      "loss": 1.5945,
      "step": 200
    },
    {
      "epoch": 0.04019938896928767,
      "grad_norm": 0.9346632361412048,
      "learning_rate": 0.00020875642818956903,
      "loss": 1.5393,
      "step": 250
    },
    {
      "epoch": 0.0482392667631452,
      "grad_norm": 0.7735784649848938,
      "learning_rate": 0.00020625662762026727,
      "loss": 1.5295,
      "step": 300
    },
    {
      "epoch": 0.05627914455700273,
      "grad_norm": 0.748665988445282,
      "learning_rate": 0.0002030719726254361,
      "loss": 1.5226,
      "step": 350
    },
    {
      "epoch": 0.06431902235086026,
      "grad_norm": 0.8975337743759155,
      "learning_rate": 0.00019922421762788784,
      "loss": 1.511,
      "step": 400
    },
    {
      "epoch": 0.0723589001447178,
      "grad_norm": 0.8698158860206604,
      "learning_rate": 0.00019473964669582803,
      "loss": 1.5249,
      "step": 450
    },
    {
      "epoch": 0.08039877793857533,
      "grad_norm": 0.792420506477356,
      "learning_rate": 0.00018964889399601773,
      "loss": 1.488,
      "step": 500
    },
    {
      "epoch": 0.08039877793857533,
      "eval_loss": 1.5327790975570679,
      "eval_runtime": 431.164,
      "eval_samples_per_second": 24.295,
      "eval_steps_per_second": 6.074,
      "step": 500
    },
    {
      "epoch": 0.08843865573243287,
      "grad_norm": 0.7909814715385437,
      "learning_rate": 0.00018398673453135197,
      "loss": 1.474,
      "step": 550
    },
    {
      "epoch": 0.0964785335262904,
      "grad_norm": 0.7088127136230469,
      "learning_rate": 0.00017779184659232858,
      "loss": 1.483,
      "step": 600
    },
    {
      "epoch": 0.10451841132014793,
      "grad_norm": 0.7714433670043945,
      "learning_rate": 0.0001711065475451048,
      "loss": 1.4852,
      "step": 650
    },
    {
      "epoch": 0.11255828911400546,
      "grad_norm": 1.0182313919067383,
      "learning_rate": 0.00016397650476097727,
      "loss": 1.4443,
      "step": 700
    },
    {
      "epoch": 0.120598166907863,
      "grad_norm": 0.7473872900009155,
      "learning_rate": 0.00015645042366192982,
      "loss": 1.452,
      "step": 750
    },
    {
      "epoch": 0.12863804470172052,
      "grad_norm": 0.789179265499115,
      "learning_rate": 0.0001485797150132148,
      "loss": 1.4872,
      "step": 800
    },
    {
      "epoch": 0.13667792249557806,
      "grad_norm": 0.8989229202270508,
      "learning_rate": 0.00014041814373569648,
      "loss": 1.4354,
      "step": 850
    },
    {
      "epoch": 0.1447178002894356,
      "grad_norm": 0.7713472843170166,
      "learning_rate": 0.00013202146163692472,
      "loss": 1.4508,
      "step": 900
    },
    {
      "epoch": 0.15275767808329313,
      "grad_norm": 0.7908226847648621,
      "learning_rate": 0.0001234470265697578,
      "loss": 1.4426,
      "step": 950
    },
    {
      "epoch": 0.16079755587715067,
      "grad_norm": 0.792984127998352,
      "learning_rate": 0.00011475341062006725,
      "loss": 1.4087,
      "step": 1000
    },
    {
      "epoch": 0.16079755587715067,
      "eval_loss": 1.4277774095535278,
      "eval_runtime": 431.2474,
      "eval_samples_per_second": 24.29,
      "eval_steps_per_second": 6.073,
      "step": 1000
    },
    {
      "epoch": 0.1688374336710082,
      "grad_norm": 0.7347446084022522,
      "learning_rate": 0.000106,
      "loss": 1.4432,
      "step": 1050
    },
    {
      "epoch": 0.17687731146486574,
      "grad_norm": 0.8061316013336182,
      "learning_rate": 9.724658937993278e-05,
      "loss": 1.4889,
      "step": 1100
    },
    {
      "epoch": 0.18491718925872327,
      "grad_norm": 0.9965606927871704,
      "learning_rate": 8.855297343024219e-05,
      "loss": 1.4394,
      "step": 1150
    },
    {
      "epoch": 0.1929570670525808,
      "grad_norm": 0.6851484179496765,
      "learning_rate": 7.99785383630753e-05,
      "loss": 1.416,
      "step": 1200
    },
    {
      "epoch": 0.20099694484643835,
      "grad_norm": 0.7076012492179871,
      "learning_rate": 7.158185626430357e-05,
      "loss": 1.4108,
      "step": 1250
    },
    {
      "epoch": 0.20903682264029586,
      "grad_norm": 0.818530797958374,
      "learning_rate": 6.342028498678525e-05,
      "loss": 1.4205,
      "step": 1300
    },
    {
      "epoch": 0.2170767004341534,
      "grad_norm": 0.7673355937004089,
      "learning_rate": 5.5549576338070204e-05,
      "loss": 1.4441,
      "step": 1350
    },
    {
      "epoch": 0.22511657822801093,
      "grad_norm": 0.7837012410163879,
      "learning_rate": 4.802349523902277e-05,
      "loss": 1.4046,
      "step": 1400
    },
    {
      "epoch": 0.23315645602186846,
      "grad_norm": 0.9885613918304443,
      "learning_rate": 4.0893452454895215e-05,
      "loss": 1.3865,
      "step": 1450
    },
    {
      "epoch": 0.241196333815726,
      "grad_norm": 0.6985939145088196,
      "learning_rate": 3.420815340767147e-05,
      "loss": 1.3805,
      "step": 1500
    },
    {
      "epoch": 0.241196333815726,
      "eval_loss": 1.3898102045059204,
      "eval_runtime": 431.5472,
      "eval_samples_per_second": 24.273,
      "eval_steps_per_second": 6.069,
      "step": 1500
    },
    {
      "epoch": 0.24923621160958354,
      "grad_norm": 0.6956173777580261,
      "learning_rate": 2.8013265468648052e-05,
      "loss": 1.398,
      "step": 1550
    },
    {
      "epoch": 0.25727608940344104,
      "grad_norm": 0.8307960629463196,
      "learning_rate": 2.2351106003982295e-05,
      "loss": 1.38,
      "step": 1600
    },
    {
      "epoch": 0.2653159671972986,
      "grad_norm": 0.6814916729927063,
      "learning_rate": 1.7260353304171974e-05,
      "loss": 1.4301,
      "step": 1650
    },
    {
      "epoch": 0.2733558449911561,
      "grad_norm": 0.8441957235336304,
      "learning_rate": 1.277578237211217e-05,
      "loss": 1.422,
      "step": 1700
    },
    {
      "epoch": 0.2813957227850137,
      "grad_norm": 0.7750621438026428,
      "learning_rate": 8.928027374563904e-06,
      "loss": 1.3458,
      "step": 1750
    },
    {
      "epoch": 0.2894356005788712,
      "grad_norm": 0.7339411973953247,
      "learning_rate": 5.743372379732728e-06,
      "loss": 1.3787,
      "step": 1800
    },
    {
      "epoch": 0.29747547837272875,
      "grad_norm": 0.7997561693191528,
      "learning_rate": 3.2435718104309803e-06,
      "loss": 1.3861,
      "step": 1850
    },
    {
      "epoch": 0.30551535616658626,
      "grad_norm": 0.7548067569732666,
      "learning_rate": 1.4457018393114339e-06,
      "loss": 1.3984,
      "step": 1900
    },
    {
      "epoch": 0.3135552339604438,
      "grad_norm": 0.824309766292572,
      "learning_rate": 3.620437412929962e-07,
      "loss": 1.3905,
      "step": 1950
    },
    {
      "epoch": 0.32159511175430133,
      "grad_norm": 0.7960425019264221,
      "learning_rate": 0.0,
      "loss": 1.3743,
      "step": 2000
    },
    {
      "epoch": 0.32159511175430133,
      "eval_loss": 1.3815141916275024,
      "eval_runtime": 431.4937,
      "eval_samples_per_second": 24.276,
      "eval_steps_per_second": 6.07,
      "step": 2000
    }
  ],
  "logging_steps": 50,
  "max_steps": 2000,
  "num_input_tokens_seen": 0,
  "num_train_epochs": 1,
  "save_steps": 500,
  "stateful_callbacks": {
    "EarlyStoppingCallback": {
      "args": {
        "early_stopping_patience": 3,
        "early_stopping_threshold": 0.0
      },
      "attributes": {
        "early_stopping_patience_counter": 0
      }
    },
    "TrainerControl": {
      "args": {
        "should_epoch_stop": false,
        "should_evaluate": false,
        "should_log": false,
        "should_save": true,
        "should_training_stop": true
      },
      "attributes": {}
    }
  },
  "total_flos": 5.4428946137088e+17,
  "train_batch_size": 4,
  "trial_name": null,
  "trial_params": null
}