{
  "best_global_step": null,
  "best_metric": null,
  "best_model_checkpoint": null,
  "epoch": 15.0,
  "eval_steps": 500,
  "global_step": 18390,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "epoch": 0.8156606851549756,
      "grad_norm": 2.7835583686828613,
      "learning_rate": 0.00019968889417401253,
      "loss": 0.6438,
      "step": 1000
    },
    {
      "epoch": 1.631321370309951,
      "grad_norm": 0.3261709213256836,
      "learning_rate": 0.00019676585418772425,
      "loss": 0.2388,
      "step": 2000
    },
    {
      "epoch": 2.4469820554649266,
      "grad_norm": 2.9635205268859863,
      "learning_rate": 0.00019084912501825553,
      "loss": 0.2032,
      "step": 3000
    },
    {
      "epoch": 3.262642740619902,
      "grad_norm": 2.162414312362671,
      "learning_rate": 0.00018212175520336934,
      "loss": 0.163,
      "step": 4000
    },
    {
      "epoch": 4.078303425774878,
      "grad_norm": 3.4721012115478516,
      "learning_rate": 0.00017085374734710157,
      "loss": 0.1519,
      "step": 5000
    },
    {
      "epoch": 4.893964110929853,
      "grad_norm": 1.8501887321472168,
      "learning_rate": 0.0001573937049265616,
      "loss": 0.13,
      "step": 6000
    },
    {
      "epoch": 5.709624796084829,
      "grad_norm": 3.3048102855682373,
      "learning_rate": 0.00014215804738782126,
      "loss": 0.1186,
      "step": 7000
    },
    {
      "epoch": 6.525285481239804,
      "grad_norm": 2.223738193511963,
      "learning_rate": 0.00012561812718836913,
      "loss": 0.1046,
      "step": 8000
    },
    {
      "epoch": 7.3409461663947795,
      "grad_norm": 0.2893391251564026,
      "learning_rate": 0.00010828564735203954,
      "loss": 0.0957,
      "step": 9000
    },
    {
      "epoch": 8.156606851549755,
      "grad_norm": 0.24986231327056885,
      "learning_rate": 9.069683068014265e-05,
      "loss": 0.0912,
      "step": 10000
    },
    {
      "epoch": 8.97226753670473,
      "grad_norm": 0.6728571653366089,
      "learning_rate": 7.339583038310173e-05,
      "loss": 0.08,
      "step": 11000
    },
    {
      "epoch": 9.787928221859707,
      "grad_norm": 0.3318624794483185,
      "learning_rate": 5.69178953654216e-05,
      "loss": 0.0722,
      "step": 12000
    },
    {
      "epoch": 10.603588907014682,
      "grad_norm": 0.1652187556028366,
      "learning_rate": 4.177281098721372e-05,
      "loss": 0.07,
      "step": 13000
    },
    {
      "epoch": 11.419249592169658,
      "grad_norm": 0.13997943699359894,
      "learning_rate": 2.8429127602959905e-05,
      "loss": 0.0635,
      "step": 14000
    },
    {
      "epoch": 12.234910277324634,
      "grad_norm": 0.17078348994255066,
      "learning_rate": 1.729966480637476e-05,
      "loss": 0.0607,
      "step": 15000
    },
    {
      "epoch": 13.05057096247961,
      "grad_norm": 0.10830472409725189,
      "learning_rate": 8.728739843127509e-06,
      "loss": 0.0599,
      "step": 16000
    },
    {
      "epoch": 13.866231647634583,
      "grad_norm": 0.13888326287269592,
      "learning_rate": 2.9815153118197825e-06,
      "loss": 0.0566,
      "step": 17000
    },
    {
      "epoch": 14.681892332789559,
      "grad_norm": 0.138445645570755,
      "learning_rate": 2.3579570823278885e-07,
      "loss": 0.0556,
      "step": 18000
    }
  ],
  "logging_steps": 1000,
  "max_steps": 18390,
  "num_input_tokens_seen": 0,
  "num_train_epochs": 15,
  "save_steps": 6000,
  "stateful_callbacks": {
    "TrainerControl": {
      "args": {
        "should_epoch_stop": false,
        "should_evaluate": false,
        "should_log": false,
        "should_save": true,
        "should_training_stop": true
      },
      "attributes": {}
    }
  },
  "total_flos": 7.5627004461372e+16,
  "train_batch_size": 1,
  "trial_name": null,
  "trial_params": null
}