{
  "best_metric": 1.3252594470977783,
  "best_model_checkpoint": "4bit_repro_03022025/host17_seed_42_full_det_fp16_no_flash_attn_fix_pad_gemma-2-9b-it-l16-cot-wt-4ep-lr3e04-ws20-bs4-ga4-fp16-13022025/checkpoint-109",
  "epoch": 2.0,
  "eval_steps": 500,
  "global_step": 218,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "epoch": 0.009195402298850575,
      "grad_norm": 1.025204062461853,
      "learning_rate": 1.4999999999999999e-05,
      "loss": 2.395,
      "step": 1
    },
    {
      "epoch": 0.01839080459770115,
      "grad_norm": 0.7824286222457886,
      "learning_rate": 2.9999999999999997e-05,
      "loss": 2.3972,
      "step": 2
    },
    {
      "epoch": 0.027586206896551724,
      "grad_norm": 0.9325972199440002,
      "learning_rate": 4.4999999999999996e-05,
      "loss": 2.2652,
      "step": 3
    },
    {
      "epoch": 0.0367816091954023,
      "grad_norm": 0.7933842539787292,
      "learning_rate": 5.9999999999999995e-05,
      "loss": 2.1491,
      "step": 4
    },
    {
      "epoch": 0.04597701149425287,
      "grad_norm": 0.9390926957130432,
      "learning_rate": 7.5e-05,
      "loss": 2.2175,
      "step": 5
    },
    {
      "epoch": 0.05517241379310345,
      "grad_norm": 0.8701347708702087,
      "learning_rate": 8.999999999999999e-05,
      "loss": 2.0785,
      "step": 6
    },
    {
      "epoch": 0.06436781609195402,
      "grad_norm": 0.48448503017425537,
      "learning_rate": 0.00010499999999999999,
      "loss": 2.0785,
      "step": 7
    },
    {
      "epoch": 0.0735632183908046,
      "grad_norm": 0.39611828327178955,
      "learning_rate": 0.00011999999999999999,
      "loss": 2.0303,
      "step": 8
    },
    {
      "epoch": 0.08275862068965517,
      "grad_norm": 0.5025896430015564,
      "learning_rate": 0.000135,
      "loss": 1.917,
      "step": 9
    },
    {
      "epoch": 0.09195402298850575,
      "grad_norm": 0.6268681883811951,
      "learning_rate": 0.00015,
      "loss": 1.7051,
      "step": 10
    },
    {
      "epoch": 0.10114942528735632,
      "grad_norm": 0.6085858941078186,
      "learning_rate": 0.000165,
      "loss": 1.729,
      "step": 11
    },
    {
      "epoch": 0.1103448275862069,
      "grad_norm": 0.5290607213973999,
      "learning_rate": 0.00017999999999999998,
      "loss": 1.715,
      "step": 12
    },
    {
      "epoch": 0.11954022988505747,
      "grad_norm": 0.657960832118988,
      "learning_rate": 0.000195,
      "loss": 1.5356,
      "step": 13
    },
    {
      "epoch": 0.12873563218390804,
      "grad_norm": 0.4407201409339905,
      "learning_rate": 0.00020999999999999998,
      "loss": 1.6903,
      "step": 14
    },
    {
      "epoch": 0.13793103448275862,
      "grad_norm": 0.3601807951927185,
      "learning_rate": 0.000225,
      "loss": 1.6744,
      "step": 15
    },
    {
      "epoch": 0.1471264367816092,
      "grad_norm": 0.3802438974380493,
      "learning_rate": 0.00023999999999999998,
      "loss": 1.8822,
      "step": 16
    },
    {
      "epoch": 0.15632183908045977,
      "grad_norm": 0.4443354904651642,
      "learning_rate": 0.00025499999999999996,
      "loss": 1.3503,
      "step": 17
    },
    {
      "epoch": 0.16551724137931034,
      "grad_norm": 0.5189216136932373,
      "learning_rate": 0.00027,
      "loss": 1.3499,
      "step": 18
    },
    {
      "epoch": 0.17471264367816092,
      "grad_norm": 0.3960488438606262,
      "learning_rate": 0.000285,
      "loss": 1.3832,
      "step": 19
    },
    {
      "epoch": 0.1839080459770115,
      "grad_norm": 0.37185606360435486,
      "learning_rate": 0.0003,
      "loss": 1.575,
      "step": 20
    },
    {
      "epoch": 0.19310344827586207,
      "grad_norm": 0.28029191493988037,
      "learning_rate": 0.00029927184466019415,
      "loss": 1.6438,
      "step": 21
    },
    {
      "epoch": 0.20229885057471264,
      "grad_norm": 0.2731279134750366,
      "learning_rate": 0.00029854368932038833,
      "loss": 1.5843,
      "step": 22
    },
    {
      "epoch": 0.21149425287356322,
      "grad_norm": 0.35780686140060425,
      "learning_rate": 0.0002978155339805825,
      "loss": 1.3945,
      "step": 23
    },
    {
      "epoch": 0.2206896551724138,
      "grad_norm": 0.35450395941734314,
      "learning_rate": 0.0002970873786407767,
      "loss": 1.4894,
      "step": 24
    },
    {
      "epoch": 0.22988505747126436,
      "grad_norm": 0.3032964766025543,
      "learning_rate": 0.00029635922330097087,
      "loss": 1.64,
      "step": 25
    },
    {
      "epoch": 0.23908045977011494,
      "grad_norm": 0.3555232584476471,
      "learning_rate": 0.00029563106796116505,
      "loss": 1.4793,
      "step": 26
    },
    {
      "epoch": 0.2482758620689655,
      "grad_norm": 0.43719008564949036,
      "learning_rate": 0.0002949029126213592,
      "loss": 1.4318,
      "step": 27
    },
    {
      "epoch": 0.2574712643678161,
      "grad_norm": 0.3937687277793884,
      "learning_rate": 0.00029417475728155335,
      "loss": 1.3755,
      "step": 28
    },
    {
      "epoch": 0.26666666666666666,
      "grad_norm": 0.3995443880558014,
      "learning_rate": 0.00029344660194174753,
      "loss": 1.6313,
      "step": 29
    },
    {
      "epoch": 0.27586206896551724,
      "grad_norm": 0.33234909176826477,
      "learning_rate": 0.0002927184466019417,
      "loss": 1.7548,
      "step": 30
    },
    {
      "epoch": 0.2850574712643678,
      "grad_norm": 0.3954809010028839,
      "learning_rate": 0.0002919902912621359,
      "loss": 1.5549,
      "step": 31
    },
    {
      "epoch": 0.2942528735632184,
      "grad_norm": 0.3647831976413727,
      "learning_rate": 0.00029126213592233006,
      "loss": 1.3264,
      "step": 32
    },
    {
      "epoch": 0.30344827586206896,
      "grad_norm": 0.4714711308479309,
      "learning_rate": 0.00029053398058252424,
      "loss": 1.2362,
      "step": 33
    },
    {
      "epoch": 0.31264367816091954,
      "grad_norm": 0.4638761878013611,
      "learning_rate": 0.0002898058252427184,
      "loss": 1.5707,
      "step": 34
    },
    {
      "epoch": 0.3218390804597701,
      "grad_norm": 0.43770870566368103,
      "learning_rate": 0.0002890776699029126,
      "loss": 1.5975,
      "step": 35
    },
    {
      "epoch": 0.3310344827586207,
      "grad_norm": 0.46125656366348267,
      "learning_rate": 0.0002883495145631068,
      "loss": 1.4532,
      "step": 36
    },
    {
      "epoch": 0.34022988505747126,
      "grad_norm": 0.3735737204551697,
      "learning_rate": 0.00028762135922330096,
      "loss": 1.4564,
      "step": 37
    },
    {
      "epoch": 0.34942528735632183,
      "grad_norm": 0.35823461413383484,
      "learning_rate": 0.00028689320388349513,
      "loss": 1.5855,
      "step": 38
    },
    {
      "epoch": 0.3586206896551724,
      "grad_norm": 0.5508543252944946,
      "learning_rate": 0.0002861650485436893,
      "loss": 1.306,
      "step": 39
    },
    {
      "epoch": 0.367816091954023,
      "grad_norm": 0.4099932014942169,
      "learning_rate": 0.0002854368932038835,
      "loss": 1.5942,
      "step": 40
    },
    {
      "epoch": 0.37701149425287356,
      "grad_norm": 0.3676886558532715,
      "learning_rate": 0.00028470873786407767,
      "loss": 1.3708,
      "step": 41
    },
    {
      "epoch": 0.38620689655172413,
      "grad_norm": 0.6290714740753174,
      "learning_rate": 0.00028398058252427185,
      "loss": 1.2496,
      "step": 42
    },
    {
      "epoch": 0.3954022988505747,
      "grad_norm": 0.3946329951286316,
      "learning_rate": 0.00028325242718446603,
      "loss": 1.3344,
      "step": 43
    },
    {
      "epoch": 0.4045977011494253,
      "grad_norm": 0.4511699080467224,
      "learning_rate": 0.00028252427184466015,
      "loss": 1.356,
      "step": 44
    },
    {
      "epoch": 0.41379310344827586,
      "grad_norm": 0.5036881566047668,
      "learning_rate": 0.00028179611650485433,
      "loss": 1.2171,
      "step": 45
    },
    {
      "epoch": 0.42298850574712643,
      "grad_norm": 0.4095934331417084,
      "learning_rate": 0.0002810679611650485,
      "loss": 1.4812,
      "step": 46
    },
    {
      "epoch": 0.432183908045977,
      "grad_norm": 0.47633135318756104,
      "learning_rate": 0.0002803398058252427,
      "loss": 1.3561,
      "step": 47
    },
    {
      "epoch": 0.4413793103448276,
      "grad_norm": 0.4468563199043274,
      "learning_rate": 0.00027961165048543687,
      "loss": 1.2434,
      "step": 48
    },
    {
      "epoch": 0.45057471264367815,
      "grad_norm": 0.48372191190719604,
      "learning_rate": 0.00027888349514563105,
      "loss": 1.2266,
      "step": 49
    },
    {
      "epoch": 0.45977011494252873,
      "grad_norm": 0.5756326913833618,
      "learning_rate": 0.0002781553398058252,
      "loss": 1.1512,
      "step": 50
    },
    {
      "epoch": 0.4689655172413793,
      "grad_norm": 0.4629153907299042,
      "learning_rate": 0.0002774271844660194,
      "loss": 1.3474,
      "step": 51
    },
    {
      "epoch": 0.4781609195402299,
      "grad_norm": 0.42864587903022766,
      "learning_rate": 0.0002766990291262136,
      "loss": 1.1593,
      "step": 52
    },
    {
      "epoch": 0.48735632183908045,
      "grad_norm": 0.5796183943748474,
      "learning_rate": 0.00027597087378640776,
      "loss": 1.2137,
      "step": 53
    },
    {
      "epoch": 0.496551724137931,
      "grad_norm": 0.5870793461799622,
      "learning_rate": 0.00027524271844660194,
      "loss": 1.1082,
      "step": 54
    },
    {
      "epoch": 0.5057471264367817,
      "grad_norm": 0.4859938323497772,
      "learning_rate": 0.0002745145631067961,
      "loss": 1.3229,
      "step": 55
    },
    {
      "epoch": 0.5149425287356322,
      "grad_norm": 0.5698845386505127,
      "learning_rate": 0.0002737864077669903,
      "loss": 1.3068,
      "step": 56
    },
    {
      "epoch": 0.5241379310344828,
      "grad_norm": 0.5284724831581116,
      "learning_rate": 0.0002730582524271845,
      "loss": 1.0949,
      "step": 57
    },
    {
      "epoch": 0.5333333333333333,
      "grad_norm": 0.5468711256980896,
      "learning_rate": 0.00027233009708737865,
      "loss": 1.2197,
      "step": 58
    },
    {
      "epoch": 0.542528735632184,
      "grad_norm": 0.6027315258979797,
      "learning_rate": 0.0002716019417475728,
      "loss": 1.1874,
      "step": 59
    },
    {
      "epoch": 0.5517241379310345,
      "grad_norm": 0.5445360541343689,
      "learning_rate": 0.00027087378640776696,
      "loss": 1.2057,
      "step": 60
    },
    {
      "epoch": 0.5609195402298851,
      "grad_norm": 0.591551661491394,
      "learning_rate": 0.00027014563106796114,
      "loss": 1.1251,
      "step": 61
    },
    {
      "epoch": 0.5701149425287356,
      "grad_norm": 0.528071939945221,
      "learning_rate": 0.0002694174757281553,
      "loss": 1.0482,
      "step": 62
    },
    {
      "epoch": 0.5793103448275863,
      "grad_norm": 0.691935658454895,
      "learning_rate": 0.0002686893203883495,
      "loss": 1.0581,
      "step": 63
    },
    {
      "epoch": 0.5885057471264368,
      "grad_norm": 0.776759684085846,
      "learning_rate": 0.00026796116504854367,
      "loss": 1.1077,
      "step": 64
    },
    {
      "epoch": 0.5977011494252874,
      "grad_norm": 0.8228328227996826,
      "learning_rate": 0.00026723300970873785,
      "loss": 1.2629,
      "step": 65
    },
    {
      "epoch": 0.6068965517241379,
      "grad_norm": 0.5646819472312927,
      "learning_rate": 0.00026650485436893203,
      "loss": 0.9204,
      "step": 66
    },
    {
      "epoch": 0.6160919540229886,
      "grad_norm": 0.6202297806739807,
      "learning_rate": 0.0002657766990291262,
      "loss": 1.1396,
      "step": 67
    },
    {
      "epoch": 0.6252873563218391,
      "grad_norm": 0.6260644197463989,
      "learning_rate": 0.0002650485436893204,
      "loss": 1.0977,
      "step": 68
    },
    {
      "epoch": 0.6344827586206897,
      "grad_norm": 0.669505774974823,
      "learning_rate": 0.00026432038834951456,
      "loss": 1.2014,
      "step": 69
    },
    {
      "epoch": 0.6436781609195402,
      "grad_norm": 0.7686023712158203,
      "learning_rate": 0.00026359223300970874,
      "loss": 1.1332,
      "step": 70
    },
    {
      "epoch": 0.6528735632183909,
      "grad_norm": 0.7180910110473633,
      "learning_rate": 0.0002628640776699029,
      "loss": 0.88,
      "step": 71
    },
    {
      "epoch": 0.6620689655172414,
      "grad_norm": 0.6693065166473389,
      "learning_rate": 0.00026213592233009705,
      "loss": 0.9068,
      "step": 72
    },
    {
      "epoch": 0.671264367816092,
      "grad_norm": 0.6618425250053406,
      "learning_rate": 0.0002614077669902912,
      "loss": 0.9885,
      "step": 73
    },
    {
      "epoch": 0.6804597701149425,
      "grad_norm": 0.7131378054618835,
      "learning_rate": 0.0002606796116504854,
      "loss": 1.0587,
      "step": 74
    },
    {
      "epoch": 0.6896551724137931,
      "grad_norm": 0.9193438291549683,
      "learning_rate": 0.0002599514563106796,
      "loss": 1.1504,
      "step": 75
    },
    {
      "epoch": 0.6988505747126437,
      "grad_norm": 1.1682260036468506,
      "learning_rate": 0.00025922330097087376,
      "loss": 1.1793,
      "step": 76
    },
    {
      "epoch": 0.7080459770114943,
      "grad_norm": 0.6184092164039612,
      "learning_rate": 0.00025849514563106794,
      "loss": 1.2133,
      "step": 77
    },
    {
      "epoch": 0.7172413793103448,
      "grad_norm": 0.7343618273735046,
      "learning_rate": 0.0002577669902912621,
      "loss": 0.9616,
      "step": 78
    },
    {
      "epoch": 0.7264367816091954,
      "grad_norm": 0.8535470366477966,
      "learning_rate": 0.0002570388349514563,
      "loss": 1.2857,
      "step": 79
    },
    {
      "epoch": 0.735632183908046,
      "grad_norm": 0.6457574367523193,
      "learning_rate": 0.0002563106796116505,
      "loss": 1.0748,
      "step": 80
    },
    {
      "epoch": 0.7448275862068966,
      "grad_norm": 0.5693302154541016,
      "learning_rate": 0.0002555825242718446,
      "loss": 1.0785,
      "step": 81
    },
    {
      "epoch": 0.7540229885057471,
      "grad_norm": 0.6433013081550598,
      "learning_rate": 0.0002548543689320388,
      "loss": 0.9738,
      "step": 82
    },
    {
      "epoch": 0.7632183908045977,
      "grad_norm": 1.2133727073669434,
      "learning_rate": 0.00025412621359223296,
      "loss": 1.2062,
      "step": 83
    },
    {
      "epoch": 0.7724137931034483,
      "grad_norm": 0.7277675271034241,
      "learning_rate": 0.00025339805825242714,
      "loss": 1.1494,
      "step": 84
    },
    {
      "epoch": 0.7816091954022989,
      "grad_norm": 0.6444184184074402,
      "learning_rate": 0.0002526699029126213,
      "loss": 1.1214,
      "step": 85
    },
    {
      "epoch": 0.7908045977011494,
      "grad_norm": 0.8243492841720581,
      "learning_rate": 0.0002519417475728155,
      "loss": 0.8145,
      "step": 86
    },
    {
      "epoch": 0.8,
      "grad_norm": 0.6770063042640686,
      "learning_rate": 0.00025121359223300967,
      "loss": 0.9134,
      "step": 87
    },
    {
      "epoch": 0.8091954022988506,
      "grad_norm": 0.6134109497070312,
      "learning_rate": 0.00025048543689320385,
      "loss": 1.0109,
      "step": 88
    },
    {
      "epoch": 0.8183908045977012,
      "grad_norm": 0.5844547748565674,
      "learning_rate": 0.00024975728155339803,
      "loss": 1.063,
      "step": 89
    },
    {
      "epoch": 0.8275862068965517,
      "grad_norm": 0.5940524339675903,
      "learning_rate": 0.0002490291262135922,
      "loss": 0.9952,
      "step": 90
    },
    {
      "epoch": 0.8367816091954023,
      "grad_norm": 0.7235853672027588,
      "learning_rate": 0.0002483009708737864,
      "loss": 0.895,
      "step": 91
    },
    {
      "epoch": 0.8459770114942529,
      "grad_norm": 0.7243452668190002,
      "learning_rate": 0.00024757281553398056,
      "loss": 0.7441,
      "step": 92
    },
    {
      "epoch": 0.8551724137931035,
      "grad_norm": 0.6366357207298279,
      "learning_rate": 0.00024684466019417474,
      "loss": 1.0253,
      "step": 93
    },
    {
      "epoch": 0.864367816091954,
      "grad_norm": 0.9579809308052063,
      "learning_rate": 0.0002461165048543689,
      "loss": 0.8995,
      "step": 94
    },
    {
      "epoch": 0.8735632183908046,
      "grad_norm": 0.8137032985687256,
      "learning_rate": 0.0002453883495145631,
      "loss": 0.8475,
      "step": 95
    },
    {
      "epoch": 0.8827586206896552,
      "grad_norm": 0.5339512825012207,
      "learning_rate": 0.0002446601941747572,
      "loss": 0.8167,
      "step": 96
    },
    {
      "epoch": 0.8919540229885058,
      "grad_norm": 0.6556524038314819,
      "learning_rate": 0.00024393203883495143,
      "loss": 1.0225,
      "step": 97
    },
    {
      "epoch": 0.9011494252873563,
      "grad_norm": 0.6119419932365417,
      "learning_rate": 0.0002432038834951456,
      "loss": 1.0889,
      "step": 98
    },
    {
      "epoch": 0.9103448275862069,
      "grad_norm": 0.7066159248352051,
      "learning_rate": 0.0002424757281553398,
      "loss": 0.8548,
      "step": 99
    },
    {
      "epoch": 0.9195402298850575,
      "grad_norm": 0.5464254021644592,
      "learning_rate": 0.00024174757281553394,
      "loss": 0.9283,
      "step": 100
    },
    {
      "epoch": 0.9287356321839081,
      "grad_norm": 0.825078010559082,
      "learning_rate": 0.00024101941747572812,
      "loss": 0.8686,
      "step": 101
    },
    {
      "epoch": 0.9379310344827586,
      "grad_norm": 1.2080026865005493,
      "learning_rate": 0.0002402912621359223,
      "loss": 0.8503,
      "step": 102
    },
    {
      "epoch": 0.9471264367816092,
      "grad_norm": 0.6597005128860474,
      "learning_rate": 0.00023956310679611648,
      "loss": 0.9614,
      "step": 103
    },
    {
      "epoch": 0.9563218390804598,
      "grad_norm": 0.614787757396698,
      "learning_rate": 0.00023883495145631065,
      "loss": 0.9684,
      "step": 104
    },
    {
      "epoch": 0.9655172413793104,
      "grad_norm": 0.6293591856956482,
      "learning_rate": 0.00023810679611650483,
      "loss": 0.7772,
      "step": 105
    },
    {
      "epoch": 0.9747126436781609,
      "grad_norm": 0.5669013857841492,
      "learning_rate": 0.000237378640776699,
      "loss": 1.1319,
      "step": 106
    },
    {
      "epoch": 0.9839080459770115,
      "grad_norm": 0.6458181738853455,
      "learning_rate": 0.0002366504854368932,
      "loss": 0.9616,
      "step": 107
    },
    {
      "epoch": 0.993103448275862,
      "grad_norm": 0.5852652192115784,
      "learning_rate": 0.00023592233009708734,
      "loss": 0.746,
      "step": 108
    },
    {
      "epoch": 1.0,
      "grad_norm": 0.8498281836509705,
      "learning_rate": 0.00023519417475728152,
      "loss": 0.8678,
      "step": 109
    },
    {
      "epoch": 1.0,
      "eval_loss": 1.3252594470977783,
      "eval_runtime": 94.0095,
      "eval_samples_per_second": 3.532,
      "eval_steps_per_second": 1.766,
      "step": 109
    },
    {
      "epoch": 1.0091954022988505,
      "grad_norm": 0.6115343570709229,
      "learning_rate": 0.0002344660194174757,
      "loss": 0.811,
      "step": 110
    },
    {
      "epoch": 1.018390804597701,
      "grad_norm": 0.5486385226249695,
      "learning_rate": 0.00023373786407766988,
      "loss": 0.6873,
      "step": 111
    },
    {
      "epoch": 1.0275862068965518,
      "grad_norm": 0.6083724498748779,
      "learning_rate": 0.00023300970873786406,
      "loss": 0.75,
      "step": 112
    },
    {
      "epoch": 1.0367816091954023,
      "grad_norm": 0.7998746633529663,
      "learning_rate": 0.00023228155339805823,
      "loss": 0.5876,
      "step": 113
    },
    {
      "epoch": 1.0459770114942528,
      "grad_norm": 0.6601845026016235,
      "learning_rate": 0.0002315533980582524,
      "loss": 0.8747,
      "step": 114
    },
    {
      "epoch": 1.0551724137931036,
      "grad_norm": 0.9844085574150085,
      "learning_rate": 0.0002308252427184466,
      "loss": 0.6241,
      "step": 115
    },
    {
      "epoch": 1.064367816091954,
      "grad_norm": 0.6095510125160217,
      "learning_rate": 0.00023009708737864074,
      "loss": 0.6274,
      "step": 116
    },
    {
      "epoch": 1.0735632183908046,
      "grad_norm": 0.9384357929229736,
      "learning_rate": 0.00022936893203883492,
      "loss": 0.7682,
      "step": 117
    },
    {
      "epoch": 1.0827586206896551,
      "grad_norm": 0.5237877368927002,
      "learning_rate": 0.0002286407766990291,
      "loss": 0.8889,
      "step": 118
    },
    {
      "epoch": 1.0919540229885056,
      "grad_norm": 0.5225788354873657,
      "learning_rate": 0.00022791262135922328,
      "loss": 0.6419,
      "step": 119
    },
    {
      "epoch": 1.1011494252873564,
      "grad_norm": 0.6210270524024963,
      "learning_rate": 0.00022718446601941746,
      "loss": 0.6799,
      "step": 120
    },
    {
      "epoch": 1.110344827586207,
      "grad_norm": 0.7171874046325684,
      "learning_rate": 0.00022645631067961164,
      "loss": 0.5037,
      "step": 121
    },
    {
      "epoch": 1.1195402298850574,
      "grad_norm": 0.6145285367965698,
      "learning_rate": 0.00022572815533980582,
      "loss": 0.4812,
      "step": 122
    },
    {
      "epoch": 1.1287356321839082,
      "grad_norm": 0.5306028723716736,
      "learning_rate": 0.000225,
      "loss": 0.6068,
      "step": 123
    },
    {
      "epoch": 1.1379310344827587,
      "grad_norm": 0.6142969131469727,
      "learning_rate": 0.00022427184466019415,
      "loss": 0.6441,
      "step": 124
    },
    {
      "epoch": 1.1471264367816092,
      "grad_norm": 0.5693908333778381,
      "learning_rate": 0.00022354368932038832,
      "loss": 0.6241,
      "step": 125
    },
    {
      "epoch": 1.1563218390804597,
      "grad_norm": 0.8560084104537964,
      "learning_rate": 0.0002228155339805825,
      "loss": 0.5208,
      "step": 126
    },
    {
      "epoch": 1.1655172413793102,
      "grad_norm": 0.9754599928855896,
      "learning_rate": 0.00022208737864077668,
      "loss": 0.6499,
      "step": 127
    },
    {
      "epoch": 1.174712643678161,
      "grad_norm": 0.515574038028717,
      "learning_rate": 0.00022135922330097086,
      "loss": 0.6246,
      "step": 128
    },
    {
      "epoch": 1.1839080459770115,
      "grad_norm": 0.5477547645568848,
      "learning_rate": 0.00022063106796116504,
      "loss": 0.6331,
      "step": 129
    },
    {
      "epoch": 1.193103448275862,
      "grad_norm": 0.445388525724411,
      "learning_rate": 0.00021990291262135922,
      "loss": 0.5737,
      "step": 130
    },
    {
      "epoch": 1.2022988505747128,
      "grad_norm": 0.6278632879257202,
      "learning_rate": 0.00021917475728155337,
      "loss": 0.5605,
      "step": 131
    },
    {
      "epoch": 1.2114942528735633,
      "grad_norm": 0.5176573991775513,
      "learning_rate": 0.00021844660194174755,
      "loss": 0.6198,
      "step": 132
    },
    {
      "epoch": 1.2206896551724138,
      "grad_norm": 0.5394790768623352,
      "learning_rate": 0.00021771844660194173,
      "loss": 0.743,
      "step": 133
    },
    {
      "epoch": 1.2298850574712643,
      "grad_norm": 0.5462550520896912,
      "learning_rate": 0.0002169902912621359,
      "loss": 0.5505,
      "step": 134
    },
    {
      "epoch": 1.2390804597701148,
      "grad_norm": 0.5793837904930115,
      "learning_rate": 0.00021626213592233008,
      "loss": 0.7134,
      "step": 135
    },
    {
      "epoch": 1.2482758620689656,
      "grad_norm": 0.5995808243751526,
      "learning_rate": 0.00021553398058252426,
      "loss": 0.8151,
      "step": 136
    },
    {
      "epoch": 1.257471264367816,
      "grad_norm": 0.6317359805107117,
      "learning_rate": 0.00021480582524271844,
      "loss": 0.4986,
      "step": 137
    },
    {
      "epoch": 1.2666666666666666,
      "grad_norm": 0.9133898019790649,
      "learning_rate": 0.00021407766990291262,
      "loss": 0.6029,
      "step": 138
    },
    {
      "epoch": 1.2758620689655173,
      "grad_norm": 0.7161931991577148,
      "learning_rate": 0.00021334951456310677,
      "loss": 0.6581,
      "step": 139
    },
    {
      "epoch": 1.2850574712643679,
      "grad_norm": 0.5639025568962097,
      "learning_rate": 0.00021262135922330095,
      "loss": 0.681,
      "step": 140
    },
    {
      "epoch": 1.2942528735632184,
      "grad_norm": 0.6325567364692688,
      "learning_rate": 0.00021189320388349513,
      "loss": 0.7994,
      "step": 141
    },
    {
      "epoch": 1.303448275862069,
      "grad_norm": 0.47429075837135315,
      "learning_rate": 0.0002111650485436893,
      "loss": 0.4164,
      "step": 142
    },
    {
      "epoch": 1.3126436781609194,
      "grad_norm": 0.3774986267089844,
      "learning_rate": 0.00021043689320388349,
      "loss": 0.3792,
      "step": 143
    },
    {
      "epoch": 1.3218390804597702,
      "grad_norm": 0.5024625062942505,
      "learning_rate": 0.00020970873786407766,
      "loss": 0.6999,
      "step": 144
    },
    {
      "epoch": 1.3310344827586207,
      "grad_norm": 0.4836028516292572,
      "learning_rate": 0.00020898058252427184,
      "loss": 0.3536,
      "step": 145
    },
    {
      "epoch": 1.3402298850574712,
      "grad_norm": 0.4562912881374359,
      "learning_rate": 0.00020825242718446602,
      "loss": 0.4362,
      "step": 146
    },
    {
      "epoch": 1.349425287356322,
      "grad_norm": 0.4715615212917328,
      "learning_rate": 0.00020752427184466017,
      "loss": 0.4743,
      "step": 147
    },
    {
      "epoch": 1.3586206896551725,
      "grad_norm": 0.5050966143608093,
      "learning_rate": 0.00020679611650485435,
      "loss": 0.6084,
      "step": 148
    },
    {
      "epoch": 1.367816091954023,
      "grad_norm": 0.5919803380966187,
      "learning_rate": 0.00020606796116504853,
      "loss": 0.4208,
      "step": 149
    },
    {
      "epoch": 1.3770114942528735,
      "grad_norm": 0.5397422313690186,
      "learning_rate": 0.0002053398058252427,
      "loss": 0.5182,
      "step": 150
    },
    {
      "epoch": 1.386206896551724,
      "grad_norm": 0.604860246181488,
      "learning_rate": 0.0002046116504854369,
      "loss": 0.6569,
      "step": 151
    },
    {
      "epoch": 1.3954022988505748,
      "grad_norm": 0.6743022799491882,
      "learning_rate": 0.00020388349514563107,
      "loss": 0.6063,
      "step": 152
    },
    {
      "epoch": 1.4045977011494253,
      "grad_norm": 0.5582085847854614,
      "learning_rate": 0.00020315533980582524,
      "loss": 0.8471,
      "step": 153
    },
    {
      "epoch": 1.4137931034482758,
      "grad_norm": 0.6764629483222961,
      "learning_rate": 0.00020242718446601942,
      "loss": 0.4767,
      "step": 154
    },
    {
      "epoch": 1.4229885057471265,
      "grad_norm": 0.39126965403556824,
      "learning_rate": 0.00020169902912621357,
      "loss": 0.4983,
      "step": 155
    },
    {
      "epoch": 1.432183908045977,
      "grad_norm": 0.5407236814498901,
      "learning_rate": 0.00020097087378640775,
      "loss": 0.6467,
      "step": 156
    },
    {
      "epoch": 1.4413793103448276,
      "grad_norm": 0.4321889579296112,
      "learning_rate": 0.00020024271844660193,
      "loss": 0.6037,
      "step": 157
    },
    {
      "epoch": 1.450574712643678,
      "grad_norm": 0.3570482134819031,
      "learning_rate": 0.0001995145631067961,
      "loss": 0.4515,
      "step": 158
    },
    {
      "epoch": 1.4597701149425286,
      "grad_norm": 0.5193243622779846,
      "learning_rate": 0.0001987864077669903,
      "loss": 0.5267,
      "step": 159
    },
    {
      "epoch": 1.4689655172413794,
      "grad_norm": 0.8264741897583008,
      "learning_rate": 0.00019805825242718447,
      "loss": 0.7169,
      "step": 160
    },
    {
      "epoch": 1.4781609195402299,
      "grad_norm": 0.6514953374862671,
      "learning_rate": 0.00019733009708737865,
      "loss": 0.678,
      "step": 161
    },
    {
      "epoch": 1.4873563218390804,
      "grad_norm": 0.5475180745124817,
      "learning_rate": 0.0001966019417475728,
      "loss": 0.5252,
      "step": 162
    },
    {
      "epoch": 1.4965517241379311,
      "grad_norm": 0.49964120984077454,
      "learning_rate": 0.00019587378640776698,
      "loss": 0.4259,
      "step": 163
    },
    {
      "epoch": 1.5057471264367817,
      "grad_norm": 0.4474540948867798,
      "learning_rate": 0.00019514563106796116,
      "loss": 0.4728,
      "step": 164
    },
    {
      "epoch": 1.5149425287356322,
      "grad_norm": 0.5726771950721741,
      "learning_rate": 0.00019441747572815533,
      "loss": 0.6752,
      "step": 165
    },
    {
      "epoch": 1.524137931034483,
      "grad_norm": 0.5038064122200012,
      "learning_rate": 0.0001936893203883495,
      "loss": 0.647,
      "step": 166
    },
    {
      "epoch": 1.5333333333333332,
      "grad_norm": 0.4093747138977051,
      "learning_rate": 0.0001929611650485437,
      "loss": 0.6077,
      "step": 167
    },
    {
      "epoch": 1.542528735632184,
      "grad_norm": 0.8166248798370361,
      "learning_rate": 0.00019223300970873787,
      "loss": 0.5149,
      "step": 168
    },
    {
      "epoch": 1.5517241379310345,
      "grad_norm": 0.5660980343818665,
      "learning_rate": 0.00019150485436893205,
      "loss": 0.3511,
      "step": 169
    },
    {
      "epoch": 1.560919540229885,
      "grad_norm": 0.403187096118927,
      "learning_rate": 0.0001907766990291262,
      "loss": 0.4097,
      "step": 170
    },
    {
      "epoch": 1.5701149425287357,
      "grad_norm": 0.5686673521995544,
      "learning_rate": 0.00019004854368932038,
      "loss": 0.8236,
      "step": 171
    },
    {
      "epoch": 1.5793103448275863,
      "grad_norm": 0.4967772662639618,
      "learning_rate": 0.00018932038834951456,
      "loss": 0.562,
      "step": 172
    },
    {
      "epoch": 1.5885057471264368,
      "grad_norm": 0.560854434967041,
      "learning_rate": 0.00018859223300970874,
      "loss": 0.5852,
      "step": 173
    },
    {
      "epoch": 1.5977011494252875,
      "grad_norm": 0.3643392324447632,
      "learning_rate": 0.00018786407766990291,
      "loss": 0.4042,
      "step": 174
    },
    {
      "epoch": 1.6068965517241378,
      "grad_norm": 0.6362044811248779,
      "learning_rate": 0.00018713592233009707,
      "loss": 0.6533,
      "step": 175
    },
    {
      "epoch": 1.6160919540229886,
      "grad_norm": 0.6190036535263062,
      "learning_rate": 0.00018640776699029122,
      "loss": 0.7651,
      "step": 176
    },
    {
      "epoch": 1.625287356321839,
      "grad_norm": 0.3463480472564697,
      "learning_rate": 0.0001856796116504854,
      "loss": 0.3312,
      "step": 177
    },
    {
      "epoch": 1.6344827586206896,
      "grad_norm": 0.2819209098815918,
      "learning_rate": 0.00018495145631067957,
      "loss": 0.3247,
      "step": 178
    },
    {
      "epoch": 1.6436781609195403,
      "grad_norm": 0.5651117563247681,
      "learning_rate": 0.00018422330097087375,
      "loss": 0.7674,
      "step": 179
    },
    {
      "epoch": 1.6528735632183909,
      "grad_norm": 0.4948618412017822,
      "learning_rate": 0.00018349514563106793,
      "loss": 0.6161,
      "step": 180
    },
    {
      "epoch": 1.6620689655172414,
      "grad_norm": 0.43636301159858704,
      "learning_rate": 0.0001827669902912621,
      "loss": 0.5334,
      "step": 181
    },
    {
      "epoch": 1.6712643678160921,
      "grad_norm": 0.4951108694076538,
      "learning_rate": 0.0001820388349514563,
      "loss": 0.624,
      "step": 182
    },
    {
      "epoch": 1.6804597701149424,
      "grad_norm": 0.5951234102249146,
      "learning_rate": 0.00018131067961165047,
      "loss": 0.5184,
      "step": 183
    },
    {
      "epoch": 1.6896551724137931,
      "grad_norm": 0.6109154224395752,
      "learning_rate": 0.00018058252427184462,
      "loss": 0.7274,
      "step": 184
    },
    {
      "epoch": 1.6988505747126437,
      "grad_norm": 0.4492969810962677,
      "learning_rate": 0.0001798543689320388,
      "loss": 0.6079,
      "step": 185
    },
    {
      "epoch": 1.7080459770114942,
      "grad_norm": 0.5195210576057434,
      "learning_rate": 0.00017912621359223298,
      "loss": 0.4998,
      "step": 186
    },
    {
      "epoch": 1.717241379310345,
      "grad_norm": 0.49724170565605164,
      "learning_rate": 0.00017839805825242716,
      "loss": 0.3856,
      "step": 187
    },
    {
      "epoch": 1.7264367816091954,
      "grad_norm": 1.1214869022369385,
      "learning_rate": 0.00017766990291262133,
      "loss": 0.4785,
      "step": 188
    },
    {
      "epoch": 1.735632183908046,
      "grad_norm": 0.5645748376846313,
      "learning_rate": 0.0001769417475728155,
      "loss": 0.5791,
      "step": 189
    },
    {
      "epoch": 1.7448275862068967,
      "grad_norm": 0.46523571014404297,
      "learning_rate": 0.0001762135922330097,
      "loss": 0.5804,
      "step": 190
    },
    {
      "epoch": 1.754022988505747,
      "grad_norm": 0.3765566945075989,
      "learning_rate": 0.00017548543689320387,
      "loss": 0.5272,
      "step": 191
    },
    {
      "epoch": 1.7632183908045977,
      "grad_norm": 0.5119166374206543,
      "learning_rate": 0.00017475728155339802,
      "loss": 0.6572,
      "step": 192
    },
    {
      "epoch": 1.7724137931034483,
      "grad_norm": 0.38700059056282043,
      "learning_rate": 0.0001740291262135922,
      "loss": 0.5233,
      "step": 193
    },
    {
      "epoch": 1.7816091954022988,
      "grad_norm": 0.3980446457862854,
      "learning_rate": 0.00017330097087378638,
      "loss": 0.4071,
      "step": 194
    },
    {
      "epoch": 1.7908045977011495,
      "grad_norm": 0.35074886679649353,
      "learning_rate": 0.00017257281553398056,
      "loss": 0.4505,
      "step": 195
    },
    {
      "epoch": 1.8,
      "grad_norm": 0.5284190773963928,
      "learning_rate": 0.00017184466019417474,
      "loss": 0.5048,
      "step": 196
    },
    {
      "epoch": 1.8091954022988506,
      "grad_norm": 0.47595924139022827,
      "learning_rate": 0.00017111650485436891,
      "loss": 0.541,
      "step": 197
    },
    {
      "epoch": 1.8183908045977013,
      "grad_norm": 0.557465672492981,
      "learning_rate": 0.0001703883495145631,
      "loss": 0.5175,
      "step": 198
    },
    {
      "epoch": 1.8275862068965516,
      "grad_norm": 0.4417920410633087,
      "learning_rate": 0.00016966019417475724,
      "loss": 0.5284,
      "step": 199
    },
    {
      "epoch": 1.8367816091954023,
      "grad_norm": 0.34410127997398376,
      "learning_rate": 0.00016893203883495142,
      "loss": 0.3723,
      "step": 200
    },
    {
      "epoch": 1.8459770114942529,
      "grad_norm": 0.3989458680152893,
      "learning_rate": 0.0001682038834951456,
      "loss": 0.4057,
      "step": 201
    },
    {
      "epoch": 1.8551724137931034,
      "grad_norm": 0.3975292444229126,
      "learning_rate": 0.00016747572815533978,
      "loss": 0.4334,
      "step": 202
    },
    {
      "epoch": 1.8643678160919541,
      "grad_norm": 0.5099373459815979,
      "learning_rate": 0.00016674757281553396,
      "loss": 0.5842,
      "step": 203
    },
    {
      "epoch": 1.8735632183908046,
      "grad_norm": 0.4445691406726837,
      "learning_rate": 0.00016601941747572814,
      "loss": 0.593,
      "step": 204
    },
    {
      "epoch": 1.8827586206896552,
      "grad_norm": 0.4758138060569763,
      "learning_rate": 0.00016529126213592232,
      "loss": 0.4163,
      "step": 205
    },
    {
      "epoch": 1.891954022988506,
      "grad_norm": 0.41732391715049744,
      "learning_rate": 0.0001645631067961165,
      "loss": 0.5405,
      "step": 206
    },
    {
      "epoch": 1.9011494252873562,
      "grad_norm": 0.3908286988735199,
      "learning_rate": 0.00016383495145631065,
      "loss": 0.4363,
      "step": 207
    },
    {
      "epoch": 1.910344827586207,
      "grad_norm": 0.5812026858329773,
      "learning_rate": 0.00016310679611650483,
      "loss": 0.7188,
      "step": 208
    },
    {
      "epoch": 1.9195402298850575,
      "grad_norm": 0.4734458327293396,
      "learning_rate": 0.000162378640776699,
      "loss": 0.569,
      "step": 209
    },
    {
      "epoch": 1.928735632183908,
      "grad_norm": 0.4347914457321167,
      "learning_rate": 0.00016165048543689318,
      "loss": 0.5026,
      "step": 210
    },
    {
      "epoch": 1.9379310344827587,
      "grad_norm": 0.3364557921886444,
      "learning_rate": 0.00016092233009708736,
      "loss": 0.3537,
      "step": 211
    },
    {
      "epoch": 1.9471264367816092,
      "grad_norm": 0.44029518961906433,
      "learning_rate": 0.00016019417475728154,
      "loss": 0.5733,
      "step": 212
    },
    {
      "epoch": 1.9563218390804598,
      "grad_norm": 0.33010566234588623,
      "learning_rate": 0.00015946601941747572,
      "loss": 0.3838,
      "step": 213
    },
    {
      "epoch": 1.9655172413793105,
      "grad_norm": 0.3367745876312256,
      "learning_rate": 0.0001587378640776699,
      "loss": 0.4212,
      "step": 214
    },
    {
      "epoch": 1.9747126436781608,
      "grad_norm": 0.5834444165229797,
      "learning_rate": 0.00015800970873786405,
      "loss": 0.833,
      "step": 215
    },
    {
      "epoch": 1.9839080459770115,
      "grad_norm": 0.5451297163963318,
      "learning_rate": 0.00015728155339805823,
      "loss": 0.7753,
      "step": 216
    },
    {
      "epoch": 1.993103448275862,
      "grad_norm": 0.4916711747646332,
      "learning_rate": 0.0001565533980582524,
      "loss": 0.6183,
      "step": 217
    },
    {
      "epoch": 2.0,
      "grad_norm": 0.7540034651756287,
      "learning_rate": 0.00015582524271844658,
      "loss": 0.8286,
      "step": 218
    },
    {
      "epoch": 2.0,
      "eval_loss": 1.4519011974334717,
      "eval_runtime": 93.8599,
      "eval_samples_per_second": 3.537,
      "eval_steps_per_second": 1.769,
      "step": 218
    }
  ],
  "logging_steps": 1,
  "max_steps": 432,
  "num_input_tokens_seen": 0,
  "num_train_epochs": 4,
  "save_steps": 500,
  "stateful_callbacks": {
    "TrainerControl": {
      "args": {
        "should_epoch_stop": false,
        "should_evaluate": false,
        "should_log": false,
        "should_save": true,
        "should_training_stop": false
      },
      "attributes": {}
    }
  },
  "total_flos": 1.9491566965342618e+17,
  "train_batch_size": 4,
  "trial_name": null,
  "trial_params": null
}