{
  "best_global_step": null,
  "best_metric": null,
  "best_model_checkpoint": null,
  "epoch": 10.0,
  "eval_steps": 25.0,
  "global_step": 110,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "epoch": 0.09090909090909091,
      "grad_norm": 17.91246223449707,
      "learning_rate": 0.0002,
      "loss": 75.6732,
      "step": 1
    },
    {
      "epoch": 0.18181818181818182,
      "grad_norm": 14.767128944396973,
      "learning_rate": 0.00019818181818181821,
      "loss": 72.9423,
      "step": 2
    },
    {
      "epoch": 0.2727272727272727,
      "grad_norm": 18.74183464050293,
      "learning_rate": 0.00019636363636363636,
      "loss": 70.9359,
      "step": 3
    },
    {
      "epoch": 0.36363636363636365,
      "grad_norm": 19.626502990722656,
      "learning_rate": 0.00019454545454545457,
      "loss": 68.6579,
      "step": 4
    },
    {
      "epoch": 0.45454545454545453,
      "grad_norm": 15.683865547180176,
      "learning_rate": 0.00019272727272727274,
      "loss": 65.4299,
      "step": 5
    },
    {
      "epoch": 0.5454545454545454,
      "grad_norm": 17.510343551635742,
      "learning_rate": 0.00019090909090909092,
      "loss": 62.1795,
      "step": 6
    },
    {
      "epoch": 0.6363636363636364,
      "grad_norm": 16.367984771728516,
      "learning_rate": 0.0001890909090909091,
      "loss": 61.7289,
      "step": 7
    },
    {
      "epoch": 0.7272727272727273,
      "grad_norm": 12.617378234863281,
      "learning_rate": 0.00018727272727272728,
      "loss": 60.3031,
      "step": 8
    },
    {
      "epoch": 0.8181818181818182,
      "grad_norm": 8.047079086303711,
      "learning_rate": 0.00018545454545454545,
      "loss": 57.0155,
      "step": 9
    },
    {
      "epoch": 0.9090909090909091,
      "grad_norm": 6.577940940856934,
      "learning_rate": 0.00018363636363636366,
      "loss": 58.1947,
      "step": 10
    },
    {
      "epoch": 1.0,
      "grad_norm": 5.5368194580078125,
      "learning_rate": 0.00018181818181818183,
      "loss": 58.6616,
      "step": 11
    },
    {
      "epoch": 1.0909090909090908,
      "grad_norm": 5.803366661071777,
      "learning_rate": 0.00018,
      "loss": 59.2017,
      "step": 12
    },
    {
      "epoch": 1.1818181818181819,
      "grad_norm": 4.7601318359375,
      "learning_rate": 0.0001781818181818182,
      "loss": 58.422,
      "step": 13
    },
    {
      "epoch": 1.2727272727272727,
      "grad_norm": 4.821050643920898,
      "learning_rate": 0.00017636363636363637,
      "loss": 58.1108,
      "step": 14
    },
    {
      "epoch": 1.3636363636363638,
      "grad_norm": 11.282706260681152,
      "learning_rate": 0.00017454545454545454,
      "loss": 55.6801,
      "step": 15
    },
    {
      "epoch": 1.4545454545454546,
      "grad_norm": 5.136598587036133,
      "learning_rate": 0.00017272727272727275,
      "loss": 57.4943,
      "step": 16
    },
    {
      "epoch": 1.5454545454545454,
      "grad_norm": 5.243523120880127,
      "learning_rate": 0.0001709090909090909,
      "loss": 56.8915,
      "step": 17
    },
    {
      "epoch": 1.6363636363636362,
      "grad_norm": 3.7712388038635254,
      "learning_rate": 0.0001690909090909091,
      "loss": 57.5851,
      "step": 18
    },
    {
      "epoch": 1.7272727272727273,
      "grad_norm": 6.291183948516846,
      "learning_rate": 0.00016727272727272728,
      "loss": 58.549,
      "step": 19
    },
    {
      "epoch": 1.8181818181818183,
      "grad_norm": 7.387523174285889,
      "learning_rate": 0.00016545454545454545,
      "loss": 54.5362,
      "step": 20
    },
    {
      "epoch": 1.9090909090909092,
      "grad_norm": 3.591944932937622,
      "learning_rate": 0.00016363636363636366,
      "loss": 55.1622,
      "step": 21
    },
    {
      "epoch": 2.0,
      "grad_norm": 4.490980625152588,
      "learning_rate": 0.00016181818181818184,
      "loss": 57.005,
      "step": 22
    },
    {
      "epoch": 2.090909090909091,
      "grad_norm": 5.48382043838501,
      "learning_rate": 0.00016,
      "loss": 56.8069,
      "step": 23
    },
    {
      "epoch": 2.1818181818181817,
      "grad_norm": 5.643973350524902,
      "learning_rate": 0.0001581818181818182,
      "loss": 57.1759,
      "step": 24
    },
    {
      "epoch": 2.2727272727272725,
      "grad_norm": 3.086944818496704,
      "learning_rate": 0.00015636363636363637,
      "loss": 56.4412,
      "step": 25
    },
    {
      "epoch": 2.3636363636363638,
      "grad_norm": 3.508883476257324,
      "learning_rate": 0.00015454545454545454,
      "loss": 57.4954,
      "step": 26
    },
    {
      "epoch": 2.4545454545454546,
      "grad_norm": 5.299294471740723,
      "learning_rate": 0.00015272727272727275,
      "loss": 55.2742,
      "step": 27
    },
    {
      "epoch": 2.5454545454545454,
      "grad_norm": 6.717090606689453,
      "learning_rate": 0.0001509090909090909,
      "loss": 55.6025,
      "step": 28
    },
    {
      "epoch": 2.6363636363636362,
      "grad_norm": 4.910857200622559,
      "learning_rate": 0.0001490909090909091,
      "loss": 55.7669,
      "step": 29
    },
    {
      "epoch": 2.7272727272727275,
      "grad_norm": 8.7036771774292,
      "learning_rate": 0.00014727272727272728,
      "loss": 54.4733,
      "step": 30
    },
    {
      "epoch": 2.8181818181818183,
      "grad_norm": 2.9404397010803223,
      "learning_rate": 0.00014545454545454546,
      "loss": 57.289,
      "step": 31
    },
    {
      "epoch": 2.909090909090909,
      "grad_norm": 3.564105272293091,
      "learning_rate": 0.00014363636363636363,
      "loss": 56.9334,
      "step": 32
    },
    {
      "epoch": 3.0,
      "grad_norm": 3.754845380783081,
      "learning_rate": 0.00014181818181818184,
      "loss": 55.0214,
      "step": 33
    },
    {
      "epoch": 3.090909090909091,
      "grad_norm": 2.8574416637420654,
      "learning_rate": 0.00014,
      "loss": 54.8367,
      "step": 34
    },
    {
      "epoch": 3.1818181818181817,
      "grad_norm": 6.656445503234863,
      "learning_rate": 0.0001381818181818182,
      "loss": 57.9869,
      "step": 35
    },
    {
      "epoch": 3.2727272727272725,
      "grad_norm": 3.2680201530456543,
      "learning_rate": 0.00013636363636363637,
      "loss": 55.1834,
      "step": 36
    },
    {
      "epoch": 3.3636363636363638,
      "grad_norm": 2.3929102420806885,
      "learning_rate": 0.00013454545454545455,
      "loss": 54.7497,
      "step": 37
    },
    {
      "epoch": 3.4545454545454546,
      "grad_norm": 3.6357691287994385,
      "learning_rate": 0.00013272727272727275,
      "loss": 55.7179,
      "step": 38
    },
    {
      "epoch": 3.5454545454545454,
      "grad_norm": 2.39304518699646,
      "learning_rate": 0.00013090909090909093,
      "loss": 55.4912,
      "step": 39
    },
    {
      "epoch": 3.6363636363636362,
      "grad_norm": 2.9240729808807373,
      "learning_rate": 0.0001290909090909091,
      "loss": 55.3497,
      "step": 40
    },
    {
      "epoch": 3.7272727272727275,
      "grad_norm": 4.932196617126465,
      "learning_rate": 0.00012727272727272728,
      "loss": 53.4382,
      "step": 41
    },
    {
      "epoch": 3.8181818181818183,
      "grad_norm": 5.664557933807373,
      "learning_rate": 0.00012545454545454546,
      "loss": 57.5408,
      "step": 42
    },
    {
      "epoch": 3.909090909090909,
      "grad_norm": 3.025942802429199,
      "learning_rate": 0.00012363636363636364,
      "loss": 54.9648,
      "step": 43
    },
    {
      "epoch": 4.0,
      "grad_norm": 4.805485248565674,
      "learning_rate": 0.00012181818181818183,
      "loss": 57.8021,
      "step": 44
    },
    {
      "epoch": 4.090909090909091,
      "grad_norm": 2.918771743774414,
      "learning_rate": 0.00012,
      "loss": 56.4665,
      "step": 45
    },
    {
      "epoch": 4.181818181818182,
      "grad_norm": 2.6171820163726807,
      "learning_rate": 0.0001181818181818182,
      "loss": 56.7592,
      "step": 46
    },
    {
      "epoch": 4.2727272727272725,
      "grad_norm": 4.681956768035889,
      "learning_rate": 0.00011636363636363636,
      "loss": 54.7808,
      "step": 47
    },
    {
      "epoch": 4.363636363636363,
      "grad_norm": 2.4477176666259766,
      "learning_rate": 0.00011454545454545456,
      "loss": 56.3999,
      "step": 48
    },
    {
      "epoch": 4.454545454545454,
      "grad_norm": 3.94838809967041,
      "learning_rate": 0.00011272727272727272,
      "loss": 55.2243,
      "step": 49
    },
    {
      "epoch": 4.545454545454545,
      "grad_norm": 4.973086833953857,
      "learning_rate": 0.00011090909090909092,
      "loss": 54.8671,
      "step": 50
    },
    {
      "epoch": 4.636363636363637,
      "grad_norm": 4.2765350341796875,
      "learning_rate": 0.00010909090909090909,
      "loss": 54.5658,
      "step": 51
    },
    {
      "epoch": 4.7272727272727275,
      "grad_norm": 3.0201237201690674,
      "learning_rate": 0.00010727272727272728,
      "loss": 55.6538,
      "step": 52
    },
    {
      "epoch": 4.818181818181818,
      "grad_norm": 3.2620699405670166,
      "learning_rate": 0.00010545454545454545,
      "loss": 54.5381,
      "step": 53
    },
    {
      "epoch": 4.909090909090909,
      "grad_norm": 5.3430304527282715,
      "learning_rate": 0.00010363636363636364,
      "loss": 56.9296,
      "step": 54
    },
    {
      "epoch": 5.0,
      "grad_norm": 3.56426739692688,
      "learning_rate": 0.00010181818181818181,
      "loss": 53.8241,
      "step": 55
    },
    {
      "epoch": 5.090909090909091,
      "grad_norm": 2.9919991493225098,
      "learning_rate": 0.0001,
      "loss": 53.9321,
      "step": 56
    },
    {
      "epoch": 5.181818181818182,
      "grad_norm": 6.316122531890869,
      "learning_rate": 9.818181818181818e-05,
      "loss": 56.8231,
      "step": 57
    },
    {
      "epoch": 5.2727272727272725,
      "grad_norm": 2.5290544033050537,
      "learning_rate": 9.636363636363637e-05,
      "loss": 53.5092,
      "step": 58
    },
    {
      "epoch": 5.363636363636363,
      "grad_norm": 3.845237970352173,
      "learning_rate": 9.454545454545455e-05,
      "loss": 54.9354,
      "step": 59
    },
    {
      "epoch": 5.454545454545454,
      "grad_norm": 4.01813268661499,
      "learning_rate": 9.272727272727273e-05,
      "loss": 55.5299,
      "step": 60
    },
    {
      "epoch": 5.545454545454545,
      "grad_norm": 5.104351997375488,
      "learning_rate": 9.090909090909092e-05,
      "loss": 56.2703,
      "step": 61
    },
    {
      "epoch": 5.636363636363637,
      "grad_norm": 2.805305242538452,
      "learning_rate": 8.90909090909091e-05,
      "loss": 55.2864,
      "step": 62
    },
    {
      "epoch": 5.7272727272727275,
      "grad_norm": 3.206454038619995,
      "learning_rate": 8.727272727272727e-05,
      "loss": 55.5734,
      "step": 63
    },
    {
      "epoch": 5.818181818181818,
      "grad_norm": 4.204082489013672,
      "learning_rate": 8.545454545454545e-05,
      "loss": 57.2362,
      "step": 64
    },
    {
      "epoch": 5.909090909090909,
      "grad_norm": 5.345448017120361,
      "learning_rate": 8.363636363636364e-05,
      "loss": 54.0839,
      "step": 65
    },
    {
      "epoch": 6.0,
      "grad_norm": 4.987890243530273,
      "learning_rate": 8.181818181818183e-05,
      "loss": 54.8737,
      "step": 66
    },
    {
      "epoch": 6.090909090909091,
      "grad_norm": 2.3609702587127686,
      "learning_rate": 8e-05,
      "loss": 56.0953,
      "step": 67
    },
    {
      "epoch": 6.181818181818182,
      "grad_norm": 11.55007553100586,
      "learning_rate": 7.818181818181818e-05,
      "loss": 51.6635,
      "step": 68
    },
    {
      "epoch": 6.2727272727272725,
      "grad_norm": 4.0023088455200195,
      "learning_rate": 7.636363636363637e-05,
      "loss": 55.1361,
      "step": 69
    },
    {
      "epoch": 6.363636363636363,
      "grad_norm": 5.44865608215332,
      "learning_rate": 7.454545454545455e-05,
      "loss": 54.295,
      "step": 70
    },
    {
      "epoch": 6.454545454545454,
      "grad_norm": 3.5155789852142334,
      "learning_rate": 7.272727272727273e-05,
      "loss": 55.0041,
      "step": 71
    },
    {
      "epoch": 6.545454545454545,
      "grad_norm": 2.4959452152252197,
      "learning_rate": 7.090909090909092e-05,
      "loss": 55.3521,
      "step": 72
    },
    {
      "epoch": 6.636363636363637,
      "grad_norm": 2.609388589859009,
      "learning_rate": 6.90909090909091e-05,
      "loss": 55.9501,
      "step": 73
    },
    {
      "epoch": 6.7272727272727275,
      "grad_norm": 3.001426935195923,
      "learning_rate": 6.727272727272727e-05,
      "loss": 54.3898,
      "step": 74
    },
    {
      "epoch": 6.818181818181818,
      "grad_norm": 4.63818359375,
      "learning_rate": 6.545454545454546e-05,
      "loss": 56.5414,
      "step": 75
    },
    {
      "epoch": 6.909090909090909,
      "grad_norm": 3.8249905109405518,
      "learning_rate": 6.363636363636364e-05,
      "loss": 55.0525,
      "step": 76
    },
    {
      "epoch": 7.0,
      "grad_norm": 4.463079929351807,
      "learning_rate": 6.181818181818182e-05,
      "loss": 56.1926,
      "step": 77
    },
    {
      "epoch": 7.090909090909091,
      "grad_norm": 3.2296130657196045,
      "learning_rate": 6e-05,
      "loss": 55.4546,
      "step": 78
    },
    {
      "epoch": 7.181818181818182,
      "grad_norm": 3.3750839233398438,
      "learning_rate": 5.818181818181818e-05,
      "loss": 55.3764,
      "step": 79
    },
    {
      "epoch": 7.2727272727272725,
      "grad_norm": 2.929070234298706,
      "learning_rate": 5.636363636363636e-05,
      "loss": 54.0089,
      "step": 80
    },
    {
      "epoch": 7.363636363636363,
      "grad_norm": 4.522458076477051,
      "learning_rate": 5.4545454545454546e-05,
      "loss": 52.8423,
      "step": 81
    },
    {
      "epoch": 7.454545454545454,
      "grad_norm": 3.412754774093628,
      "learning_rate": 5.272727272727272e-05,
      "loss": 54.2377,
      "step": 82
    },
    {
      "epoch": 7.545454545454545,
      "grad_norm": 3.1856706142425537,
      "learning_rate": 5.090909090909091e-05,
      "loss": 56.1019,
      "step": 83
    },
    {
      "epoch": 7.636363636363637,
      "grad_norm": 2.810863494873047,
      "learning_rate": 4.909090909090909e-05,
      "loss": 55.3778,
      "step": 84
    },
    {
      "epoch": 7.7272727272727275,
      "grad_norm": 3.276930809020996,
      "learning_rate": 4.7272727272727275e-05,
      "loss": 55.1294,
      "step": 85
    },
    {
      "epoch": 7.818181818181818,
      "grad_norm": 5.239167213439941,
      "learning_rate": 4.545454545454546e-05,
      "loss": 53.4743,
      "step": 86
    },
    {
      "epoch": 7.909090909090909,
      "grad_norm": 2.900198221206665,
      "learning_rate": 4.3636363636363636e-05,
      "loss": 55.4292,
      "step": 87
    },
    {
      "epoch": 8.0,
      "grad_norm": 9.296468734741211,
      "learning_rate": 4.181818181818182e-05,
      "loss": 50.4517,
      "step": 88
    },
    {
      "epoch": 8.090909090909092,
      "grad_norm": 5.20082426071167,
      "learning_rate": 4e-05,
      "loss": 52.4611,
      "step": 89
    },
    {
      "epoch": 8.181818181818182,
      "grad_norm": 2.87880277633667,
      "learning_rate": 3.818181818181819e-05,
      "loss": 54.9065,
      "step": 90
    },
    {
      "epoch": 8.272727272727273,
      "grad_norm": 5.661789894104004,
      "learning_rate": 3.6363636363636364e-05,
      "loss": 51.4769,
      "step": 91
    },
    {
      "epoch": 8.363636363636363,
      "grad_norm": 3.0113143920898438,
      "learning_rate": 3.454545454545455e-05,
      "loss": 55.6557,
      "step": 92
    },
    {
      "epoch": 8.454545454545455,
      "grad_norm": 3.002990961074829,
      "learning_rate": 3.272727272727273e-05,
      "loss": 54.1278,
      "step": 93
    },
    {
      "epoch": 8.545454545454545,
      "grad_norm": 3.011186361312866,
      "learning_rate": 3.090909090909091e-05,
      "loss": 56.3365,
      "step": 94
    },
    {
      "epoch": 8.636363636363637,
      "grad_norm": 3.185494899749756,
      "learning_rate": 2.909090909090909e-05,
      "loss": 56.082,
      "step": 95
    },
    {
      "epoch": 8.727272727272727,
      "grad_norm": 3.2342071533203125,
      "learning_rate": 2.7272727272727273e-05,
      "loss": 55.6653,
      "step": 96
    },
    {
      "epoch": 8.818181818181818,
      "grad_norm": 3.0022008419036865,
      "learning_rate": 2.5454545454545454e-05,
      "loss": 52.6479,
      "step": 97
    },
    {
      "epoch": 8.909090909090908,
      "grad_norm": 6.0146164894104,
      "learning_rate": 2.3636363636363637e-05,
      "loss": 53.5091,
      "step": 98
    },
    {
      "epoch": 9.0,
      "grad_norm": 4.083887577056885,
      "learning_rate": 2.1818181818181818e-05,
      "loss": 51.2912,
      "step": 99
    },
    {
      "epoch": 9.090909090909092,
      "grad_norm": 4.559575080871582,
      "learning_rate": 2e-05,
      "loss": 52.5935,
      "step": 100
    },
    {
      "epoch": 9.181818181818182,
      "grad_norm": 3.2446377277374268,
      "learning_rate": 1.8181818181818182e-05,
      "loss": 54.5666,
      "step": 101
    },
    {
      "epoch": 9.272727272727273,
      "grad_norm": 3.780581474304199,
      "learning_rate": 1.6363636363636366e-05,
      "loss": 52.7207,
      "step": 102
    },
    {
      "epoch": 9.363636363636363,
      "grad_norm": 3.1265249252319336,
      "learning_rate": 1.4545454545454545e-05,
      "loss": 54.8719,
      "step": 103
    },
    {
      "epoch": 9.454545454545455,
      "grad_norm": 3.441582679748535,
      "learning_rate": 1.2727272727272727e-05,
      "loss": 54.9213,
      "step": 104
    },
    {
      "epoch": 9.545454545454545,
      "grad_norm": 2.735200881958008,
      "learning_rate": 1.0909090909090909e-05,
      "loss": 54.696,
      "step": 105
    },
    {
      "epoch": 9.636363636363637,
      "grad_norm": 3.8126707077026367,
      "learning_rate": 9.090909090909091e-06,
      "loss": 52.9369,
      "step": 106
    },
    {
      "epoch": 9.727272727272727,
      "grad_norm": 2.936985969543457,
      "learning_rate": 7.272727272727272e-06,
      "loss": 53.189,
      "step": 107
    },
    {
      "epoch": 9.818181818181818,
      "grad_norm": 3.6623241901397705,
      "learning_rate": 5.4545454545454545e-06,
      "loss": 53.3994,
      "step": 108
    },
    {
      "epoch": 9.909090909090908,
      "grad_norm": 3.0908303260803223,
      "learning_rate": 3.636363636363636e-06,
      "loss": 54.5745,
      "step": 109
    },
    {
      "epoch": 10.0,
      "grad_norm": 2.79154634475708,
      "learning_rate": 1.818181818181818e-06,
      "loss": 53.8217,
      "step": 110
    },
    {
      "epoch": 10.0,
      "step": 110,
      "total_flos": 11897760042000.0,
      "train_loss": 56.17975682345303,
      "train_runtime": 404.5331,
      "train_samples_per_second": 4.326,
      "train_steps_per_second": 0.272
    }
  ],
  "logging_steps": 1.0,
  "max_steps": 110,
  "num_input_tokens_seen": 0,
  "num_train_epochs": 10,
  "save_steps": 500,
  "stateful_callbacks": {
    "TrainerControl": {
      "args": {
        "should_epoch_stop": false,
        "should_evaluate": false,
        "should_log": false,
        "should_save": true,
        "should_training_stop": true
      },
      "attributes": {}
    }
  },
  "total_flos": 11897760042000.0,
  "train_batch_size": 2,
  "trial_name": null,
  "trial_params": null
}