{
  "best_metric": null,
  "best_model_checkpoint": null,
  "epoch": 15.886524822695035,
  "eval_steps": 80,
  "global_step": 840,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "epoch": 0.6052009456264775,
      "grad_norm": 897.57861328125,
      "learning_rate": 1.9047619047619045e-07,
      "log_odds_chosen": 0.061996445059776306,
      "log_odds_ratio": -0.7339106202125549,
      "logits/chosen": -2.475102663040161,
      "logits/rejected": -2.5303642749786377,
      "logps/chosen": -1.3030776977539062,
      "logps/rejected": -1.351835012435913,
      "loss": 1.9448,
      "nll_loss": 1.881751298904419,
      "rewards/accuracies": 0.5,
      "rewards/chosen": -0.13030776381492615,
      "rewards/margins": 0.004875739570707083,
      "rewards/rejected": -0.13518351316452026,
      "step": 32
    },
    {
      "epoch": 1.210401891252955,
      "grad_norm": 555.6674194335938,
      "learning_rate": 3.809523809523809e-07,
      "log_odds_chosen": 0.13086628913879395,
      "log_odds_ratio": -0.6972255110740662,
      "logits/chosen": -2.408938407897949,
      "logits/rejected": -2.4563820362091064,
      "logps/chosen": -1.2012869119644165,
      "logps/rejected": -1.3022348880767822,
      "loss": 1.5253,
      "nll_loss": 1.4454330205917358,
      "rewards/accuracies": 0.54296875,
      "rewards/chosen": -0.12012868374586105,
      "rewards/margins": 0.010094808414578438,
      "rewards/rejected": -0.13022349774837494,
      "step": 64
    },
    {
      "epoch": 1.5130023640661938,
      "eval_log_odds_chosen": 1.2037408351898193,
      "eval_log_odds_ratio": -0.2748129367828369,
      "eval_logits/chosen": -2.1409010887145996,
      "eval_logits/rejected": -2.1931569576263428,
      "eval_logps/chosen": -1.156149983406067,
      "eval_logps/rejected": -2.1107430458068848,
      "eval_loss": 1.3948438167572021,
      "eval_nll_loss": 1.5358692407608032,
      "eval_rewards/accuracies": 1.0,
      "eval_rewards/chosen": -0.11561501026153564,
      "eval_rewards/margins": 0.09545929729938507,
      "eval_rewards/rejected": -0.21107430756092072,
      "eval_runtime": 0.8754,
      "eval_samples_per_second": 156.493,
      "eval_steps_per_second": 5.711,
      "step": 80
    },
    {
      "epoch": 1.8156028368794326,
      "grad_norm": 140.96612548828125,
      "learning_rate": 4.996892303047305e-07,
      "log_odds_chosen": 0.16402098536491394,
      "log_odds_ratio": -0.6756913065910339,
      "logits/chosen": -2.3939661979675293,
      "logits/rejected": -2.389753580093384,
      "logps/chosen": -1.0995960235595703,
      "logps/rejected": -1.2302087545394897,
      "loss": 1.3931,
      "nll_loss": 1.3130543231964111,
      "rewards/accuracies": 0.49609375,
      "rewards/chosen": -0.10995960980653763,
      "rewards/margins": 0.013061259873211384,
      "rewards/rejected": -0.12302087247371674,
      "step": 96
    },
    {
      "epoch": 2.42080378250591,
      "grad_norm": 3005.20654296875,
      "learning_rate": 4.958326378681848e-07,
      "log_odds_chosen": 0.05211365222930908,
      "log_odds_ratio": -0.7710955142974854,
      "logits/chosen": -2.4226865768432617,
      "logits/rejected": -2.4471077919006348,
      "logps/chosen": -1.8894121646881104,
      "logps/rejected": -1.878553867340088,
      "loss": 1.9751,
      "nll_loss": 1.9949692487716675,
      "rewards/accuracies": 0.54296875,
      "rewards/chosen": -0.18894124031066895,
      "rewards/margins": -0.001085837371647358,
      "rewards/rejected": -0.18785539269447327,
      "step": 128
    },
    {
      "epoch": 3.0260047281323876,
      "grad_norm": 3593.66064453125,
      "learning_rate": 4.876353872369572e-07,
      "log_odds_chosen": 0.010831637308001518,
      "log_odds_ratio": -0.8205243349075317,
      "logits/chosen": -2.4603629112243652,
      "logits/rejected": -2.4731788635253906,
      "logps/chosen": -1.9289910793304443,
      "logps/rejected": -1.854127049446106,
      "loss": 2.0756,
      "nll_loss": 2.116929769515991,
      "rewards/accuracies": 0.58203125,
      "rewards/chosen": -0.19289910793304443,
      "rewards/margins": -0.0074864043854177,
      "rewards/rejected": -0.1854127049446106,
      "step": 160
    },
    {
      "epoch": 3.0260047281323876,
      "eval_log_odds_chosen": 1.280719518661499,
      "eval_log_odds_ratio": -0.25084003806114197,
      "eval_logits/chosen": -2.156606912612915,
      "eval_logits/rejected": -2.2219834327697754,
      "eval_logps/chosen": -1.4854581356048584,
      "eval_logps/rejected": -2.5444798469543457,
      "eval_loss": 1.3283345699310303,
      "eval_nll_loss": 1.4989588260650635,
      "eval_rewards/accuracies": 1.0,
      "eval_rewards/chosen": -0.14854581654071808,
      "eval_rewards/margins": 0.10590219497680664,
      "eval_rewards/rejected": -0.2544480264186859,
      "eval_runtime": 0.8785,
      "eval_samples_per_second": 155.943,
      "eval_steps_per_second": 5.691,
      "step": 160
    },
    {
      "epoch": 3.631205673758865,
      "grad_norm": 817.2037353515625,
      "learning_rate": 4.752422169756047e-07,
      "log_odds_chosen": 0.09074901789426804,
      "log_odds_ratio": -0.7456185817718506,
      "logits/chosen": -2.377356767654419,
      "logits/rejected": -2.396003007888794,
      "logps/chosen": -1.5171489715576172,
      "logps/rejected": -1.5251379013061523,
      "loss": 1.9051,
      "nll_loss": 1.6593117713928223,
      "rewards/accuracies": 0.609375,
      "rewards/chosen": -0.15171489119529724,
      "rewards/margins": 0.0007988963043317199,
      "rewards/rejected": -0.152513787150383,
      "step": 192
    },
    {
      "epoch": 4.236406619385343,
      "grad_norm": 954.6674194335938,
      "learning_rate": 4.588719528532341e-07,
      "log_odds_chosen": 0.1411646008491516,
      "log_odds_ratio": -0.6900860667228699,
      "logits/chosen": -2.398102283477783,
      "logits/rejected": -2.397972345352173,
      "logps/chosen": -1.2695732116699219,
      "logps/rejected": -1.3286174535751343,
      "loss": 1.4204,
      "nll_loss": 1.3868590593338013,
      "rewards/accuracies": 0.6171875,
      "rewards/chosen": -0.12695731222629547,
      "rewards/margins": 0.005904428660869598,
      "rewards/rejected": -0.13286174833774567,
      "step": 224
    },
    {
      "epoch": 4.539007092198582,
      "eval_log_odds_chosen": 1.1990762948989868,
      "eval_log_odds_ratio": -0.2697806656360626,
      "eval_logits/chosen": -2.137376546859741,
      "eval_logits/rejected": -2.1972498893737793,
      "eval_logps/chosen": -1.2540639638900757,
      "eval_logps/rejected": -2.2160115242004395,
      "eval_loss": 1.2844356298446655,
      "eval_nll_loss": 1.4172712564468384,
      "eval_rewards/accuracies": 1.0,
      "eval_rewards/chosen": -0.125406414270401,
      "eval_rewards/margins": 0.09619472920894623,
      "eval_rewards/rejected": -0.22160112857818604,
      "eval_runtime": 0.8664,
      "eval_samples_per_second": 158.119,
      "eval_steps_per_second": 5.771,
      "step": 240
    },
    {
      "epoch": 4.84160756501182,
      "grad_norm": 790.6442260742188,
      "learning_rate": 4.3881364404463375e-07,
      "log_odds_chosen": 0.20751571655273438,
      "log_odds_ratio": -0.6614270210266113,
      "logits/chosen": -2.3498642444610596,
      "logits/rejected": -2.370640993118286,
      "logps/chosen": -1.1192173957824707,
      "logps/rejected": -1.2252520322799683,
      "loss": 1.3469,
      "nll_loss": 1.2267839908599854,
      "rewards/accuracies": 0.65625,
      "rewards/chosen": -0.1119217574596405,
      "rewards/margins": 0.010603459551930428,
      "rewards/rejected": -0.12252521514892578,
      "step": 256
    },
    {
      "epoch": 5.446808510638298,
      "grad_norm": 2515.4189453125,
      "learning_rate": 4.154214593992149e-07,
      "log_odds_chosen": 0.23377765715122223,
      "log_odds_ratio": -0.6729075312614441,
      "logits/chosen": -2.322608709335327,
      "logits/rejected": -2.361389636993408,
      "logps/chosen": -1.1726882457733154,
      "logps/rejected": -1.2837783098220825,
      "loss": 1.3539,
      "nll_loss": 1.2735731601715088,
      "rewards/accuracies": 0.6875,
      "rewards/chosen": -0.11726883798837662,
      "rewards/margins": 0.011109001003205776,
      "rewards/rejected": -0.12837782502174377,
      "step": 288
    },
    {
      "epoch": 6.052009456264775,
      "grad_norm": 2093.776611328125,
      "learning_rate": 3.891084338941603e-07,
      "log_odds_chosen": 0.16962425410747528,
      "log_odds_ratio": -0.6696641445159912,
      "logits/chosen": -2.325108051300049,
      "logits/rejected": -2.3817710876464844,
      "logps/chosen": -3.6265933513641357,
      "logps/rejected": -3.700042724609375,
      "loss": 3.6173,
      "nll_loss": 3.7216219902038574,
      "rewards/accuracies": 0.59765625,
      "rewards/chosen": -0.3626593351364136,
      "rewards/margins": 0.007344960235059261,
      "rewards/rejected": -0.3700042963027954,
      "step": 320
    },
    {
      "epoch": 6.052009456264775,
      "eval_log_odds_chosen": 1.1228582859039307,
      "eval_log_odds_ratio": -0.2914997637271881,
      "eval_logits/chosen": -2.153041362762451,
      "eval_logits/rejected": -2.239081621170044,
      "eval_logps/chosen": -1.1402614116668701,
      "eval_logps/rejected": -2.0236728191375732,
      "eval_loss": 1.2484513521194458,
      "eval_nll_loss": 1.3337957859039307,
      "eval_rewards/accuracies": 1.0,
      "eval_rewards/chosen": -0.11402615159749985,
      "eval_rewards/margins": 0.0883411318063736,
      "eval_rewards/rejected": -0.20236727595329285,
      "eval_runtime": 0.8835,
      "eval_samples_per_second": 155.057,
      "eval_steps_per_second": 5.659,
      "step": 320
    },
    {
      "epoch": 6.657210401891253,
      "grad_norm": 750.7427978515625,
      "learning_rate": 3.6033917569043597e-07,
      "log_odds_chosen": 0.2158849686384201,
      "log_odds_ratio": -0.651162326335907,
      "logits/chosen": -2.2999160289764404,
      "logits/rejected": -2.3155159950256348,
      "logps/chosen": -3.3152918815612793,
      "logps/rejected": -3.4116926193237305,
      "loss": 3.4506,
      "nll_loss": 3.4377260208129883,
      "rewards/accuracies": 0.6015625,
      "rewards/chosen": -0.3315292000770569,
      "rewards/margins": 0.00964003149420023,
      "rewards/rejected": -0.34116923809051514,
      "step": 352
    },
    {
      "epoch": 7.26241134751773,
      "grad_norm": 466.0474548339844,
      "learning_rate": 3.296216625629211e-07,
      "log_odds_chosen": 0.2518257200717926,
      "log_odds_ratio": -0.6292858123779297,
      "logits/chosen": -2.287289619445801,
      "logits/rejected": -2.274383783340454,
      "logps/chosen": -2.936006784439087,
      "logps/rejected": -3.0706114768981934,
      "loss": 3.1836,
      "nll_loss": 3.031456708908081,
      "rewards/accuracies": 0.66796875,
      "rewards/chosen": -0.2936007082462311,
      "rewards/margins": 0.013460462912917137,
      "rewards/rejected": -0.30706116557121277,
      "step": 384
    },
    {
      "epoch": 7.5650118203309695,
      "eval_log_odds_chosen": 1.1787246465682983,
      "eval_log_odds_ratio": -0.27878421545028687,
      "eval_logits/chosen": -2.131922721862793,
      "eval_logits/rejected": -2.198315143585205,
      "eval_logps/chosen": -1.1629152297973633,
      "eval_logps/rejected": -2.102142810821533,
      "eval_loss": 1.2289972305297852,
      "eval_nll_loss": 1.3089702129364014,
      "eval_rewards/accuracies": 1.0,
      "eval_rewards/chosen": -0.11629153788089752,
      "eval_rewards/margins": 0.09392273426055908,
      "eval_rewards/rejected": -0.2102142572402954,
      "eval_runtime": 0.8657,
      "eval_samples_per_second": 158.25,
      "eval_steps_per_second": 5.776,
      "step": 400
    },
    {
      "epoch": 7.867612293144208,
      "grad_norm": 493.2022399902344,
      "learning_rate": 2.974982725547975e-07,
      "log_odds_chosen": 0.29160410165786743,
      "log_odds_ratio": -0.6114708781242371,
      "logits/chosen": -2.296574115753174,
      "logits/rejected": -2.3063693046569824,
      "logps/chosen": -2.907156229019165,
      "logps/rejected": -3.049989938735962,
      "loss": 3.0885,
      "nll_loss": 2.9950599670410156,
      "rewards/accuracies": 0.6953125,
      "rewards/chosen": -0.29071560502052307,
      "rewards/margins": 0.014283367432653904,
      "rewards/rejected": -0.3049989938735962,
      "step": 416
    },
    {
      "epoch": 8.472813238770685,
      "grad_norm": 2084.139892578125,
      "learning_rate": 2.6453620722761895e-07,
      "log_odds_chosen": 0.2739107012748718,
      "log_odds_ratio": -0.6295269727706909,
      "logits/chosen": -2.3001277446746826,
      "logits/rejected": -2.2884907722473145,
      "logps/chosen": -2.9699883460998535,
      "logps/rejected": -3.114020586013794,
      "loss": 2.9983,
      "nll_loss": 3.031224012374878,
      "rewards/accuracies": 0.65234375,
      "rewards/chosen": -0.29699885845184326,
      "rewards/margins": 0.014403235167264938,
      "rewards/rejected": -0.3114020824432373,
      "step": 448
    },
    {
      "epoch": 9.078014184397164,
      "grad_norm": 557.6774291992188,
      "learning_rate": 2.3131747660339394e-07,
      "log_odds_chosen": 0.2703976333141327,
      "log_odds_ratio": -0.6236827969551086,
      "logits/chosen": -2.2672348022460938,
      "logits/rejected": -2.2586584091186523,
      "logps/chosen": -2.937666893005371,
      "logps/rejected": -3.061203718185425,
      "loss": 2.8082,
      "nll_loss": 3.023472785949707,
      "rewards/accuracies": 0.66015625,
      "rewards/chosen": -0.29376670718193054,
      "rewards/margins": 0.012353670783340931,
      "rewards/rejected": -0.306120365858078,
      "step": 480
    },
    {
      "epoch": 9.078014184397164,
      "eval_log_odds_chosen": 1.1980304718017578,
      "eval_log_odds_ratio": -0.27382025122642517,
      "eval_logits/chosen": -2.1204967498779297,
      "eval_logits/rejected": -2.1800942420959473,
      "eval_logps/chosen": -1.192492961883545,
      "eval_logps/rejected": -2.1554245948791504,
      "eval_loss": 1.2367494106292725,
      "eval_nll_loss": 1.3177238702774048,
      "eval_rewards/accuracies": 1.0,
      "eval_rewards/chosen": -0.11924929916858673,
      "eval_rewards/margins": 0.09629315137863159,
      "eval_rewards/rejected": -0.21554246544837952,
      "eval_runtime": 0.88,
      "eval_samples_per_second": 155.689,
      "eval_steps_per_second": 5.682,
      "step": 480
    },
    {
      "epoch": 9.68321513002364,
      "grad_norm": 8620.671875,
      "learning_rate": 1.984286226342056e-07,
      "log_odds_chosen": 0.36868974566459656,
      "log_odds_ratio": -0.6002693176269531,
      "logits/chosen": -2.237966537475586,
      "logits/rejected": -2.2450058460235596,
      "logps/chosen": -2.536555290222168,
      "logps/rejected": -2.738464117050171,
      "loss": 2.7562,
      "nll_loss": 2.642591714859009,
      "rewards/accuracies": 0.69921875,
      "rewards/chosen": -0.2536555230617523,
      "rewards/margins": 0.020190902054309845,
      "rewards/rejected": -0.27384641766548157,
      "step": 512
    },
    {
      "epoch": 10.288416075650119,
      "grad_norm": 8913.7607421875,
      "learning_rate": 1.6645036265170313e-07,
      "log_odds_chosen": 0.23036888241767883,
      "log_odds_ratio": -0.6965319514274597,
      "logits/chosen": -2.346311092376709,
      "logits/rejected": -2.3196349143981934,
      "logps/chosen": -2.625997543334961,
      "logps/rejected": -2.695284605026245,
      "loss": 2.9109,
      "nll_loss": 2.6460041999816895,
      "rewards/accuracies": 0.68359375,
      "rewards/chosen": -0.26259979605674744,
      "rewards/margins": 0.006928655784577131,
      "rewards/rejected": -0.26952844858169556,
      "step": 544
    },
    {
      "epoch": 10.591016548463356,
      "eval_log_odds_chosen": 1.2137528657913208,
      "eval_log_odds_ratio": -0.2704525589942932,
      "eval_logits/chosen": -2.1178054809570312,
      "eval_logits/rejected": -2.1774165630340576,
      "eval_logps/chosen": -1.1941485404968262,
      "eval_logps/rejected": -2.171353340148926,
      "eval_loss": 1.237461805343628,
      "eval_nll_loss": 1.3179538249969482,
      "eval_rewards/accuracies": 1.0,
      "eval_rewards/chosen": -0.11941485106945038,
      "eval_rewards/margins": 0.09772048145532608,
      "eval_rewards/rejected": -0.21713533997535706,
      "eval_runtime": 0.8664,
      "eval_samples_per_second": 158.121,
      "eval_steps_per_second": 5.771,
      "step": 560
    },
    {
      "epoch": 10.893617021276595,
      "grad_norm": 11410.7939453125,
      "learning_rate": 1.3594733566170925e-07,
      "log_odds_chosen": 0.2925941050052643,
      "log_odds_ratio": -0.6866809725761414,
      "logits/chosen": -2.3280415534973145,
      "logits/rejected": -2.308394432067871,
      "logps/chosen": -3.2026498317718506,
      "logps/rejected": -3.3342018127441406,
      "loss": 2.9544,
      "nll_loss": 3.251168966293335,
      "rewards/accuracies": 0.66796875,
      "rewards/chosen": -0.320264995098114,
      "rewards/margins": 0.013155205175280571,
      "rewards/rejected": -0.33342018723487854,
      "step": 576
    },
    {
      "epoch": 11.498817966903074,
      "grad_norm": 2123.895751953125,
      "learning_rate": 1.0745813253325956e-07,
      "log_odds_chosen": 0.3092188239097595,
      "log_odds_ratio": -0.6492509245872498,
      "logits/chosen": -2.3580808639526367,
      "logits/rejected": -2.349421501159668,
      "logps/chosen": -2.5302317142486572,
      "logps/rejected": -2.662865161895752,
      "loss": 2.8523,
      "nll_loss": 2.578503131866455,
      "rewards/accuracies": 0.6953125,
      "rewards/chosen": -0.2530231475830078,
      "rewards/margins": 0.013263333588838577,
      "rewards/rejected": -0.2662864923477173,
      "step": 608
    },
    {
      "epoch": 12.10401891252955,
      "grad_norm": 1731.5615234375,
      "learning_rate": 8.148578611867113e-08,
      "log_odds_chosen": 0.3393189013004303,
      "log_odds_ratio": -0.6164168119430542,
      "logits/chosen": -2.1805524826049805,
      "logits/rejected": -2.177432060241699,
      "logps/chosen": -2.5276594161987305,
      "logps/rejected": -2.710268497467041,
      "loss": 2.5512,
      "nll_loss": 2.681882381439209,
      "rewards/accuracies": 0.67578125,
      "rewards/chosen": -0.252765953540802,
      "rewards/margins": 0.01826086826622486,
      "rewards/rejected": -0.2710268199443817,
      "step": 640
    },
    {
      "epoch": 12.10401891252955,
      "eval_log_odds_chosen": 1.2131071090698242,
      "eval_log_odds_ratio": -0.270443856716156,
      "eval_logits/chosen": -2.1185622215270996,
      "eval_logits/rejected": -2.178537368774414,
      "eval_logps/chosen": -1.198697566986084,
      "eval_logps/rejected": -2.176114559173584,
      "eval_loss": 1.2388056516647339,
      "eval_nll_loss": 1.3213987350463867,
      "eval_rewards/accuracies": 1.0,
      "eval_rewards/chosen": -0.11986975371837616,
      "eval_rewards/margins": 0.09774170815944672,
      "eval_rewards/rejected": -0.21761147677898407,
      "eval_runtime": 0.8825,
      "eval_samples_per_second": 155.235,
      "eval_steps_per_second": 5.666,
      "step": 640
    },
    {
      "epoch": 12.709219858156029,
      "grad_norm": 1997.690185546875,
      "learning_rate": 5.848888922025552e-08,
      "log_odds_chosen": 0.3624497354030609,
      "log_odds_ratio": -0.6123137474060059,
      "logits/chosen": -2.179229259490967,
      "logits/rejected": -2.1755523681640625,
      "logps/chosen": -2.486222743988037,
      "logps/rejected": -2.6808714866638184,
      "loss": 2.5628,
      "nll_loss": 2.6199562549591064,
      "rewards/accuracies": 0.69140625,
      "rewards/chosen": -0.24862225353717804,
      "rewards/margins": 0.019464917480945587,
      "rewards/rejected": -0.2680871784687042,
      "step": 672
    },
    {
      "epoch": 13.314420803782506,
      "grad_norm": 2036.525146484375,
      "learning_rate": 3.887349723342303e-08,
      "log_odds_chosen": 0.3463588356971741,
      "log_odds_ratio": -0.6327537298202515,
      "logits/chosen": -2.181072235107422,
      "logits/rejected": -2.1947262287139893,
      "logps/chosen": -2.517810344696045,
      "logps/rejected": -2.672647476196289,
      "loss": 2.6212,
      "nll_loss": 2.6852023601531982,
      "rewards/accuracies": 0.68359375,
      "rewards/chosen": -0.25178101658821106,
      "rewards/margins": 0.01548372209072113,
      "rewards/rejected": -0.2672647535800934,
      "step": 704
    },
    {
      "epoch": 13.617021276595745,
      "eval_log_odds_chosen": 1.2199119329452515,
      "eval_log_odds_ratio": -0.26896363496780396,
      "eval_logits/chosen": -2.1166138648986816,
      "eval_logits/rejected": -2.1762003898620605,
      "eval_logps/chosen": -1.1962625980377197,
      "eval_logps/rejected": -2.1790993213653564,
      "eval_loss": 1.2387369871139526,
      "eval_nll_loss": 1.3203083276748657,
      "eval_rewards/accuracies": 1.0,
      "eval_rewards/chosen": -0.11962626129388809,
      "eval_rewards/margins": 0.09828367829322815,
      "eval_rewards/rejected": -0.21790993213653564,
      "eval_runtime": 0.8708,
      "eval_samples_per_second": 157.334,
      "eval_steps_per_second": 5.742,
      "step": 720
    },
    {
      "epoch": 13.919621749408984,
      "grad_norm": 9910.3740234375,
      "learning_rate": 2.298595844092377e-08,
      "log_odds_chosen": 0.3617098927497864,
      "log_odds_ratio": -0.6060731410980225,
      "logits/chosen": -2.2685229778289795,
      "logits/rejected": -2.2752606868743896,
      "logps/chosen": -1.9047422409057617,
      "logps/rejected": -2.0876576900482178,
      "loss": 2.4957,
      "nll_loss": 1.975754737854004,
      "rewards/accuracies": 0.71484375,
      "rewards/chosen": -0.19047421216964722,
      "rewards/margins": 0.01829155907034874,
      "rewards/rejected": -0.20876577496528625,
      "step": 736
    },
    {
      "epoch": 14.52482269503546,
      "grad_norm": 1543.000244140625,
      "learning_rate": 1.1106798553464802e-08,
      "log_odds_chosen": 0.42522603273391724,
      "log_odds_ratio": -0.5653746128082275,
      "logits/chosen": -2.353919744491577,
      "logits/rejected": -2.358372688293457,
      "logps/chosen": -1.2913402318954468,
      "logps/rejected": -1.542799711227417,
      "loss": 1.4582,
      "nll_loss": 1.3932266235351562,
      "rewards/accuracies": 0.765625,
      "rewards/chosen": -0.12913402915000916,
      "rewards/margins": 0.025145962834358215,
      "rewards/rejected": -0.15427997708320618,
      "step": 768
    },
    {
      "epoch": 15.130023640661939,
      "grad_norm": 698.0999755859375,
      "learning_rate": 3.4457674771554422e-09,
      "log_odds_chosen": 0.4467349052429199,
      "log_odds_ratio": -0.545281171798706,
      "logits/chosen": -2.313391923904419,
      "logits/rejected": -2.3118624687194824,
      "logps/chosen": -1.2114390134811401,
      "logps/rejected": -1.4863505363464355,
      "loss": 1.3504,
      "nll_loss": 1.3252184391021729,
      "rewards/accuracies": 0.75,
      "rewards/chosen": -0.1211438924074173,
      "rewards/margins": 0.027491170912981033,
      "rewards/rejected": -0.14863505959510803,
      "step": 800
    },
    {
      "epoch": 15.130023640661939,
      "eval_log_odds_chosen": 1.211981177330017,
      "eval_log_odds_ratio": -0.27068275213241577,
      "eval_logits/chosen": -2.118680715560913,
      "eval_logits/rejected": -2.1784884929656982,
      "eval_logps/chosen": -1.1996212005615234,
      "eval_logps/rejected": -2.176278829574585,
      "eval_loss": 1.2384228706359863,
      "eval_nll_loss": 1.3189568519592285,
      "eval_rewards/accuracies": 1.0,
      "eval_rewards/chosen": -0.11996213346719742,
      "eval_rewards/margins": 0.09766574203968048,
      "eval_rewards/rejected": -0.2176278829574585,
      "eval_runtime": 0.8764,
      "eval_samples_per_second": 156.329,
      "eval_steps_per_second": 5.705,
      "step": 800
    },
    {
      "epoch": 15.735224586288416,
      "grad_norm": 295.0424499511719,
      "learning_rate": 1.3813576683111006e-10,
      "log_odds_chosen": 0.44846177101135254,
      "log_odds_ratio": -0.5449205636978149,
      "logits/chosen": -2.3085861206054688,
      "logits/rejected": -2.3130688667297363,
      "logps/chosen": -1.1487438678741455,
      "logps/rejected": -1.4232044219970703,
      "loss": 1.3316,
      "nll_loss": 1.246992588043213,
      "rewards/accuracies": 0.74609375,
      "rewards/chosen": -0.11487438529729843,
      "rewards/margins": 0.027446046471595764,
      "rewards/rejected": -0.142320454120636,
      "step": 832
    },
    {
      "epoch": 15.886524822695035,
      "grad_norm": 305.3218078613281,
      "learning_rate": 0.0,
      "log_odds_chosen": 0.500209391117096,
      "log_odds_ratio": -0.5302451848983765,
      "logits/chosen": -2.2818732261657715,
      "logits/rejected": -2.2850182056427,
      "logps/chosen": -1.1465669870376587,
      "logps/rejected": -1.4646430015563965,
      "loss": 1.3265,
      "nll_loss": 1.2768977880477905,
      "rewards/accuracies": 0.75,
      "rewards/chosen": -0.11465670168399811,
      "rewards/margins": 0.03180759772658348,
      "rewards/rejected": -0.1464642882347107,
      "step": 840
    },
    {
      "epoch": 15.886524822695035,
      "eval_log_odds_chosen": 1.2168288230895996,
      "eval_log_odds_ratio": -0.26950639486312866,
      "eval_logits/chosen": -2.1189827919006348,
      "eval_logits/rejected": -2.1787045001983643,
      "eval_logps/chosen": -1.1971455812454224,
      "eval_logps/rejected": -2.1773040294647217,
      "eval_loss": 1.2378294467926025,
      "eval_nll_loss": 1.3174165487289429,
      "eval_rewards/accuracies": 1.0,
      "eval_rewards/chosen": -0.11971455812454224,
      "eval_rewards/margins": 0.09801585972309113,
      "eval_rewards/rejected": -0.21773043274879456,
      "eval_runtime": 0.8739,
      "eval_samples_per_second": 156.768,
      "eval_steps_per_second": 5.721,
      "step": 840
    }
  ],
  "logging_steps": 32,
  "max_steps": 840,
  "num_input_tokens_seen": 0,
  "num_train_epochs": 17,
  "save_steps": 80,
  "total_flos": 0.0,
  "train_batch_size": 1,
  "trial_name": null,
  "trial_params": null
}