|
{
|
|
"best_metric": null,
|
|
"best_model_checkpoint": null,
|
|
"epoch": 15.886524822695035,
|
|
"eval_steps": 80,
|
|
"global_step": 840,
|
|
"is_hyper_param_search": false,
|
|
"is_local_process_zero": true,
|
|
"is_world_process_zero": true,
|
|
"log_history": [
|
|
{
|
|
"epoch": 0.6052009456264775,
|
|
"grad_norm": 897.57861328125,
|
|
"learning_rate": 1.9047619047619045e-07,
|
|
"log_odds_chosen": 0.061996445059776306,
|
|
"log_odds_ratio": -0.7339106202125549,
|
|
"logits/chosen": -2.475102663040161,
|
|
"logits/rejected": -2.5303642749786377,
|
|
"logps/chosen": -1.3030776977539062,
|
|
"logps/rejected": -1.351835012435913,
|
|
"loss": 1.9448,
|
|
"nll_loss": 1.881751298904419,
|
|
"rewards/accuracies": 0.5,
|
|
"rewards/chosen": -0.13030776381492615,
|
|
"rewards/margins": 0.004875739570707083,
|
|
"rewards/rejected": -0.13518351316452026,
|
|
"step": 32
|
|
},
|
|
{
|
|
"epoch": 1.210401891252955,
|
|
"grad_norm": 555.6674194335938,
|
|
"learning_rate": 3.809523809523809e-07,
|
|
"log_odds_chosen": 0.13086628913879395,
|
|
"log_odds_ratio": -0.6972255110740662,
|
|
"logits/chosen": -2.408938407897949,
|
|
"logits/rejected": -2.4563820362091064,
|
|
"logps/chosen": -1.2012869119644165,
|
|
"logps/rejected": -1.3022348880767822,
|
|
"loss": 1.5253,
|
|
"nll_loss": 1.4454330205917358,
|
|
"rewards/accuracies": 0.54296875,
|
|
"rewards/chosen": -0.12012868374586105,
|
|
"rewards/margins": 0.010094808414578438,
|
|
"rewards/rejected": -0.13022349774837494,
|
|
"step": 64
|
|
},
|
|
{
|
|
"epoch": 1.5130023640661938,
|
|
"eval_log_odds_chosen": 1.2037408351898193,
|
|
"eval_log_odds_ratio": -0.2748129367828369,
|
|
"eval_logits/chosen": -2.1409010887145996,
|
|
"eval_logits/rejected": -2.1931569576263428,
|
|
"eval_logps/chosen": -1.156149983406067,
|
|
"eval_logps/rejected": -2.1107430458068848,
|
|
"eval_loss": 1.3948438167572021,
|
|
"eval_nll_loss": 1.5358692407608032,
|
|
"eval_rewards/accuracies": 1.0,
|
|
"eval_rewards/chosen": -0.11561501026153564,
|
|
"eval_rewards/margins": 0.09545929729938507,
|
|
"eval_rewards/rejected": -0.21107430756092072,
|
|
"eval_runtime": 0.8754,
|
|
"eval_samples_per_second": 156.493,
|
|
"eval_steps_per_second": 5.711,
|
|
"step": 80
|
|
},
|
|
{
|
|
"epoch": 1.8156028368794326,
|
|
"grad_norm": 140.96612548828125,
|
|
"learning_rate": 4.996892303047305e-07,
|
|
"log_odds_chosen": 0.16402098536491394,
|
|
"log_odds_ratio": -0.6756913065910339,
|
|
"logits/chosen": -2.3939661979675293,
|
|
"logits/rejected": -2.389753580093384,
|
|
"logps/chosen": -1.0995960235595703,
|
|
"logps/rejected": -1.2302087545394897,
|
|
"loss": 1.3931,
|
|
"nll_loss": 1.3130543231964111,
|
|
"rewards/accuracies": 0.49609375,
|
|
"rewards/chosen": -0.10995960980653763,
|
|
"rewards/margins": 0.013061259873211384,
|
|
"rewards/rejected": -0.12302087247371674,
|
|
"step": 96
|
|
},
|
|
{
|
|
"epoch": 2.42080378250591,
|
|
"grad_norm": 3005.20654296875,
|
|
"learning_rate": 4.958326378681848e-07,
|
|
"log_odds_chosen": 0.05211365222930908,
|
|
"log_odds_ratio": -0.7710955142974854,
|
|
"logits/chosen": -2.4226865768432617,
|
|
"logits/rejected": -2.4471077919006348,
|
|
"logps/chosen": -1.8894121646881104,
|
|
"logps/rejected": -1.878553867340088,
|
|
"loss": 1.9751,
|
|
"nll_loss": 1.9949692487716675,
|
|
"rewards/accuracies": 0.54296875,
|
|
"rewards/chosen": -0.18894124031066895,
|
|
"rewards/margins": -0.001085837371647358,
|
|
"rewards/rejected": -0.18785539269447327,
|
|
"step": 128
|
|
},
|
|
{
|
|
"epoch": 3.0260047281323876,
|
|
"grad_norm": 3593.66064453125,
|
|
"learning_rate": 4.876353872369572e-07,
|
|
"log_odds_chosen": 0.010831637308001518,
|
|
"log_odds_ratio": -0.8205243349075317,
|
|
"logits/chosen": -2.4603629112243652,
|
|
"logits/rejected": -2.4731788635253906,
|
|
"logps/chosen": -1.9289910793304443,
|
|
"logps/rejected": -1.854127049446106,
|
|
"loss": 2.0756,
|
|
"nll_loss": 2.116929769515991,
|
|
"rewards/accuracies": 0.58203125,
|
|
"rewards/chosen": -0.19289910793304443,
|
|
"rewards/margins": -0.0074864043854177,
|
|
"rewards/rejected": -0.1854127049446106,
|
|
"step": 160
|
|
},
|
|
{
|
|
"epoch": 3.0260047281323876,
|
|
"eval_log_odds_chosen": 1.280719518661499,
|
|
"eval_log_odds_ratio": -0.25084003806114197,
|
|
"eval_logits/chosen": -2.156606912612915,
|
|
"eval_logits/rejected": -2.2219834327697754,
|
|
"eval_logps/chosen": -1.4854581356048584,
|
|
"eval_logps/rejected": -2.5444798469543457,
|
|
"eval_loss": 1.3283345699310303,
|
|
"eval_nll_loss": 1.4989588260650635,
|
|
"eval_rewards/accuracies": 1.0,
|
|
"eval_rewards/chosen": -0.14854581654071808,
|
|
"eval_rewards/margins": 0.10590219497680664,
|
|
"eval_rewards/rejected": -0.2544480264186859,
|
|
"eval_runtime": 0.8785,
|
|
"eval_samples_per_second": 155.943,
|
|
"eval_steps_per_second": 5.691,
|
|
"step": 160
|
|
},
|
|
{
|
|
"epoch": 3.631205673758865,
|
|
"grad_norm": 817.2037353515625,
|
|
"learning_rate": 4.752422169756047e-07,
|
|
"log_odds_chosen": 0.09074901789426804,
|
|
"log_odds_ratio": -0.7456185817718506,
|
|
"logits/chosen": -2.377356767654419,
|
|
"logits/rejected": -2.396003007888794,
|
|
"logps/chosen": -1.5171489715576172,
|
|
"logps/rejected": -1.5251379013061523,
|
|
"loss": 1.9051,
|
|
"nll_loss": 1.6593117713928223,
|
|
"rewards/accuracies": 0.609375,
|
|
"rewards/chosen": -0.15171489119529724,
|
|
"rewards/margins": 0.0007988963043317199,
|
|
"rewards/rejected": -0.152513787150383,
|
|
"step": 192
|
|
},
|
|
{
|
|
"epoch": 4.236406619385343,
|
|
"grad_norm": 954.6674194335938,
|
|
"learning_rate": 4.588719528532341e-07,
|
|
"log_odds_chosen": 0.1411646008491516,
|
|
"log_odds_ratio": -0.6900860667228699,
|
|
"logits/chosen": -2.398102283477783,
|
|
"logits/rejected": -2.397972345352173,
|
|
"logps/chosen": -1.2695732116699219,
|
|
"logps/rejected": -1.3286174535751343,
|
|
"loss": 1.4204,
|
|
"nll_loss": 1.3868590593338013,
|
|
"rewards/accuracies": 0.6171875,
|
|
"rewards/chosen": -0.12695731222629547,
|
|
"rewards/margins": 0.005904428660869598,
|
|
"rewards/rejected": -0.13286174833774567,
|
|
"step": 224
|
|
},
|
|
{
|
|
"epoch": 4.539007092198582,
|
|
"eval_log_odds_chosen": 1.1990762948989868,
|
|
"eval_log_odds_ratio": -0.2697806656360626,
|
|
"eval_logits/chosen": -2.137376546859741,
|
|
"eval_logits/rejected": -2.1972498893737793,
|
|
"eval_logps/chosen": -1.2540639638900757,
|
|
"eval_logps/rejected": -2.2160115242004395,
|
|
"eval_loss": 1.2844356298446655,
|
|
"eval_nll_loss": 1.4172712564468384,
|
|
"eval_rewards/accuracies": 1.0,
|
|
"eval_rewards/chosen": -0.125406414270401,
|
|
"eval_rewards/margins": 0.09619472920894623,
|
|
"eval_rewards/rejected": -0.22160112857818604,
|
|
"eval_runtime": 0.8664,
|
|
"eval_samples_per_second": 158.119,
|
|
"eval_steps_per_second": 5.771,
|
|
"step": 240
|
|
},
|
|
{
|
|
"epoch": 4.84160756501182,
|
|
"grad_norm": 790.6442260742188,
|
|
"learning_rate": 4.3881364404463375e-07,
|
|
"log_odds_chosen": 0.20751571655273438,
|
|
"log_odds_ratio": -0.6614270210266113,
|
|
"logits/chosen": -2.3498642444610596,
|
|
"logits/rejected": -2.370640993118286,
|
|
"logps/chosen": -1.1192173957824707,
|
|
"logps/rejected": -1.2252520322799683,
|
|
"loss": 1.3469,
|
|
"nll_loss": 1.2267839908599854,
|
|
"rewards/accuracies": 0.65625,
|
|
"rewards/chosen": -0.1119217574596405,
|
|
"rewards/margins": 0.010603459551930428,
|
|
"rewards/rejected": -0.12252521514892578,
|
|
"step": 256
|
|
},
|
|
{
|
|
"epoch": 5.446808510638298,
|
|
"grad_norm": 2515.4189453125,
|
|
"learning_rate": 4.154214593992149e-07,
|
|
"log_odds_chosen": 0.23377765715122223,
|
|
"log_odds_ratio": -0.6729075312614441,
|
|
"logits/chosen": -2.322608709335327,
|
|
"logits/rejected": -2.361389636993408,
|
|
"logps/chosen": -1.1726882457733154,
|
|
"logps/rejected": -1.2837783098220825,
|
|
"loss": 1.3539,
|
|
"nll_loss": 1.2735731601715088,
|
|
"rewards/accuracies": 0.6875,
|
|
"rewards/chosen": -0.11726883798837662,
|
|
"rewards/margins": 0.011109001003205776,
|
|
"rewards/rejected": -0.12837782502174377,
|
|
"step": 288
|
|
},
|
|
{
|
|
"epoch": 6.052009456264775,
|
|
"grad_norm": 2093.776611328125,
|
|
"learning_rate": 3.891084338941603e-07,
|
|
"log_odds_chosen": 0.16962425410747528,
|
|
"log_odds_ratio": -0.6696641445159912,
|
|
"logits/chosen": -2.325108051300049,
|
|
"logits/rejected": -2.3817710876464844,
|
|
"logps/chosen": -3.6265933513641357,
|
|
"logps/rejected": -3.700042724609375,
|
|
"loss": 3.6173,
|
|
"nll_loss": 3.7216219902038574,
|
|
"rewards/accuracies": 0.59765625,
|
|
"rewards/chosen": -0.3626593351364136,
|
|
"rewards/margins": 0.007344960235059261,
|
|
"rewards/rejected": -0.3700042963027954,
|
|
"step": 320
|
|
},
|
|
{
|
|
"epoch": 6.052009456264775,
|
|
"eval_log_odds_chosen": 1.1228582859039307,
|
|
"eval_log_odds_ratio": -0.2914997637271881,
|
|
"eval_logits/chosen": -2.153041362762451,
|
|
"eval_logits/rejected": -2.239081621170044,
|
|
"eval_logps/chosen": -1.1402614116668701,
|
|
"eval_logps/rejected": -2.0236728191375732,
|
|
"eval_loss": 1.2484513521194458,
|
|
"eval_nll_loss": 1.3337957859039307,
|
|
"eval_rewards/accuracies": 1.0,
|
|
"eval_rewards/chosen": -0.11402615159749985,
|
|
"eval_rewards/margins": 0.0883411318063736,
|
|
"eval_rewards/rejected": -0.20236727595329285,
|
|
"eval_runtime": 0.8835,
|
|
"eval_samples_per_second": 155.057,
|
|
"eval_steps_per_second": 5.659,
|
|
"step": 320
|
|
},
|
|
{
|
|
"epoch": 6.657210401891253,
|
|
"grad_norm": 750.7427978515625,
|
|
"learning_rate": 3.6033917569043597e-07,
|
|
"log_odds_chosen": 0.2158849686384201,
|
|
"log_odds_ratio": -0.651162326335907,
|
|
"logits/chosen": -2.2999160289764404,
|
|
"logits/rejected": -2.3155159950256348,
|
|
"logps/chosen": -3.3152918815612793,
|
|
"logps/rejected": -3.4116926193237305,
|
|
"loss": 3.4506,
|
|
"nll_loss": 3.4377260208129883,
|
|
"rewards/accuracies": 0.6015625,
|
|
"rewards/chosen": -0.3315292000770569,
|
|
"rewards/margins": 0.00964003149420023,
|
|
"rewards/rejected": -0.34116923809051514,
|
|
"step": 352
|
|
},
|
|
{
|
|
"epoch": 7.26241134751773,
|
|
"grad_norm": 466.0474548339844,
|
|
"learning_rate": 3.296216625629211e-07,
|
|
"log_odds_chosen": 0.2518257200717926,
|
|
"log_odds_ratio": -0.6292858123779297,
|
|
"logits/chosen": -2.287289619445801,
|
|
"logits/rejected": -2.274383783340454,
|
|
"logps/chosen": -2.936006784439087,
|
|
"logps/rejected": -3.0706114768981934,
|
|
"loss": 3.1836,
|
|
"nll_loss": 3.031456708908081,
|
|
"rewards/accuracies": 0.66796875,
|
|
"rewards/chosen": -0.2936007082462311,
|
|
"rewards/margins": 0.013460462912917137,
|
|
"rewards/rejected": -0.30706116557121277,
|
|
"step": 384
|
|
},
|
|
{
|
|
"epoch": 7.5650118203309695,
|
|
"eval_log_odds_chosen": 1.1787246465682983,
|
|
"eval_log_odds_ratio": -0.27878421545028687,
|
|
"eval_logits/chosen": -2.131922721862793,
|
|
"eval_logits/rejected": -2.198315143585205,
|
|
"eval_logps/chosen": -1.1629152297973633,
|
|
"eval_logps/rejected": -2.102142810821533,
|
|
"eval_loss": 1.2289972305297852,
|
|
"eval_nll_loss": 1.3089702129364014,
|
|
"eval_rewards/accuracies": 1.0,
|
|
"eval_rewards/chosen": -0.11629153788089752,
|
|
"eval_rewards/margins": 0.09392273426055908,
|
|
"eval_rewards/rejected": -0.2102142572402954,
|
|
"eval_runtime": 0.8657,
|
|
"eval_samples_per_second": 158.25,
|
|
"eval_steps_per_second": 5.776,
|
|
"step": 400
|
|
},
|
|
{
|
|
"epoch": 7.867612293144208,
|
|
"grad_norm": 493.2022399902344,
|
|
"learning_rate": 2.974982725547975e-07,
|
|
"log_odds_chosen": 0.29160410165786743,
|
|
"log_odds_ratio": -0.6114708781242371,
|
|
"logits/chosen": -2.296574115753174,
|
|
"logits/rejected": -2.3063693046569824,
|
|
"logps/chosen": -2.907156229019165,
|
|
"logps/rejected": -3.049989938735962,
|
|
"loss": 3.0885,
|
|
"nll_loss": 2.9950599670410156,
|
|
"rewards/accuracies": 0.6953125,
|
|
"rewards/chosen": -0.29071560502052307,
|
|
"rewards/margins": 0.014283367432653904,
|
|
"rewards/rejected": -0.3049989938735962,
|
|
"step": 416
|
|
},
|
|
{
|
|
"epoch": 8.472813238770685,
|
|
"grad_norm": 2084.139892578125,
|
|
"learning_rate": 2.6453620722761895e-07,
|
|
"log_odds_chosen": 0.2739107012748718,
|
|
"log_odds_ratio": -0.6295269727706909,
|
|
"logits/chosen": -2.3001277446746826,
|
|
"logits/rejected": -2.2884907722473145,
|
|
"logps/chosen": -2.9699883460998535,
|
|
"logps/rejected": -3.114020586013794,
|
|
"loss": 2.9983,
|
|
"nll_loss": 3.031224012374878,
|
|
"rewards/accuracies": 0.65234375,
|
|
"rewards/chosen": -0.29699885845184326,
|
|
"rewards/margins": 0.014403235167264938,
|
|
"rewards/rejected": -0.3114020824432373,
|
|
"step": 448
|
|
},
|
|
{
|
|
"epoch": 9.078014184397164,
|
|
"grad_norm": 557.6774291992188,
|
|
"learning_rate": 2.3131747660339394e-07,
|
|
"log_odds_chosen": 0.2703976333141327,
|
|
"log_odds_ratio": -0.6236827969551086,
|
|
"logits/chosen": -2.2672348022460938,
|
|
"logits/rejected": -2.2586584091186523,
|
|
"logps/chosen": -2.937666893005371,
|
|
"logps/rejected": -3.061203718185425,
|
|
"loss": 2.8082,
|
|
"nll_loss": 3.023472785949707,
|
|
"rewards/accuracies": 0.66015625,
|
|
"rewards/chosen": -0.29376670718193054,
|
|
"rewards/margins": 0.012353670783340931,
|
|
"rewards/rejected": -0.306120365858078,
|
|
"step": 480
|
|
},
|
|
{
|
|
"epoch": 9.078014184397164,
|
|
"eval_log_odds_chosen": 1.1980304718017578,
|
|
"eval_log_odds_ratio": -0.27382025122642517,
|
|
"eval_logits/chosen": -2.1204967498779297,
|
|
"eval_logits/rejected": -2.1800942420959473,
|
|
"eval_logps/chosen": -1.192492961883545,
|
|
"eval_logps/rejected": -2.1554245948791504,
|
|
"eval_loss": 1.2367494106292725,
|
|
"eval_nll_loss": 1.3177238702774048,
|
|
"eval_rewards/accuracies": 1.0,
|
|
"eval_rewards/chosen": -0.11924929916858673,
|
|
"eval_rewards/margins": 0.09629315137863159,
|
|
"eval_rewards/rejected": -0.21554246544837952,
|
|
"eval_runtime": 0.88,
|
|
"eval_samples_per_second": 155.689,
|
|
"eval_steps_per_second": 5.682,
|
|
"step": 480
|
|
},
|
|
{
|
|
"epoch": 9.68321513002364,
|
|
"grad_norm": 8620.671875,
|
|
"learning_rate": 1.984286226342056e-07,
|
|
"log_odds_chosen": 0.36868974566459656,
|
|
"log_odds_ratio": -0.6002693176269531,
|
|
"logits/chosen": -2.237966537475586,
|
|
"logits/rejected": -2.2450058460235596,
|
|
"logps/chosen": -2.536555290222168,
|
|
"logps/rejected": -2.738464117050171,
|
|
"loss": 2.7562,
|
|
"nll_loss": 2.642591714859009,
|
|
"rewards/accuracies": 0.69921875,
|
|
"rewards/chosen": -0.2536555230617523,
|
|
"rewards/margins": 0.020190902054309845,
|
|
"rewards/rejected": -0.27384641766548157,
|
|
"step": 512
|
|
},
|
|
{
|
|
"epoch": 10.288416075650119,
|
|
"grad_norm": 8913.7607421875,
|
|
"learning_rate": 1.6645036265170313e-07,
|
|
"log_odds_chosen": 0.23036888241767883,
|
|
"log_odds_ratio": -0.6965319514274597,
|
|
"logits/chosen": -2.346311092376709,
|
|
"logits/rejected": -2.3196349143981934,
|
|
"logps/chosen": -2.625997543334961,
|
|
"logps/rejected": -2.695284605026245,
|
|
"loss": 2.9109,
|
|
"nll_loss": 2.6460041999816895,
|
|
"rewards/accuracies": 0.68359375,
|
|
"rewards/chosen": -0.26259979605674744,
|
|
"rewards/margins": 0.006928655784577131,
|
|
"rewards/rejected": -0.26952844858169556,
|
|
"step": 544
|
|
},
|
|
{
|
|
"epoch": 10.591016548463356,
|
|
"eval_log_odds_chosen": 1.2137528657913208,
|
|
"eval_log_odds_ratio": -0.2704525589942932,
|
|
"eval_logits/chosen": -2.1178054809570312,
|
|
"eval_logits/rejected": -2.1774165630340576,
|
|
"eval_logps/chosen": -1.1941485404968262,
|
|
"eval_logps/rejected": -2.171353340148926,
|
|
"eval_loss": 1.237461805343628,
|
|
"eval_nll_loss": 1.3179538249969482,
|
|
"eval_rewards/accuracies": 1.0,
|
|
"eval_rewards/chosen": -0.11941485106945038,
|
|
"eval_rewards/margins": 0.09772048145532608,
|
|
"eval_rewards/rejected": -0.21713533997535706,
|
|
"eval_runtime": 0.8664,
|
|
"eval_samples_per_second": 158.121,
|
|
"eval_steps_per_second": 5.771,
|
|
"step": 560
|
|
},
|
|
{
|
|
"epoch": 10.893617021276595,
|
|
"grad_norm": 11410.7939453125,
|
|
"learning_rate": 1.3594733566170925e-07,
|
|
"log_odds_chosen": 0.2925941050052643,
|
|
"log_odds_ratio": -0.6866809725761414,
|
|
"logits/chosen": -2.3280415534973145,
|
|
"logits/rejected": -2.308394432067871,
|
|
"logps/chosen": -3.2026498317718506,
|
|
"logps/rejected": -3.3342018127441406,
|
|
"loss": 2.9544,
|
|
"nll_loss": 3.251168966293335,
|
|
"rewards/accuracies": 0.66796875,
|
|
"rewards/chosen": -0.320264995098114,
|
|
"rewards/margins": 0.013155205175280571,
|
|
"rewards/rejected": -0.33342018723487854,
|
|
"step": 576
|
|
},
|
|
{
|
|
"epoch": 11.498817966903074,
|
|
"grad_norm": 2123.895751953125,
|
|
"learning_rate": 1.0745813253325956e-07,
|
|
"log_odds_chosen": 0.3092188239097595,
|
|
"log_odds_ratio": -0.6492509245872498,
|
|
"logits/chosen": -2.3580808639526367,
|
|
"logits/rejected": -2.349421501159668,
|
|
"logps/chosen": -2.5302317142486572,
|
|
"logps/rejected": -2.662865161895752,
|
|
"loss": 2.8523,
|
|
"nll_loss": 2.578503131866455,
|
|
"rewards/accuracies": 0.6953125,
|
|
"rewards/chosen": -0.2530231475830078,
|
|
"rewards/margins": 0.013263333588838577,
|
|
"rewards/rejected": -0.2662864923477173,
|
|
"step": 608
|
|
},
|
|
{
|
|
"epoch": 12.10401891252955,
|
|
"grad_norm": 1731.5615234375,
|
|
"learning_rate": 8.148578611867113e-08,
|
|
"log_odds_chosen": 0.3393189013004303,
|
|
"log_odds_ratio": -0.6164168119430542,
|
|
"logits/chosen": -2.1805524826049805,
|
|
"logits/rejected": -2.177432060241699,
|
|
"logps/chosen": -2.5276594161987305,
|
|
"logps/rejected": -2.710268497467041,
|
|
"loss": 2.5512,
|
|
"nll_loss": 2.681882381439209,
|
|
"rewards/accuracies": 0.67578125,
|
|
"rewards/chosen": -0.252765953540802,
|
|
"rewards/margins": 0.01826086826622486,
|
|
"rewards/rejected": -0.2710268199443817,
|
|
"step": 640
|
|
},
|
|
{
|
|
"epoch": 12.10401891252955,
|
|
"eval_log_odds_chosen": 1.2131071090698242,
|
|
"eval_log_odds_ratio": -0.270443856716156,
|
|
"eval_logits/chosen": -2.1185622215270996,
|
|
"eval_logits/rejected": -2.178537368774414,
|
|
"eval_logps/chosen": -1.198697566986084,
|
|
"eval_logps/rejected": -2.176114559173584,
|
|
"eval_loss": 1.2388056516647339,
|
|
"eval_nll_loss": 1.3213987350463867,
|
|
"eval_rewards/accuracies": 1.0,
|
|
"eval_rewards/chosen": -0.11986975371837616,
|
|
"eval_rewards/margins": 0.09774170815944672,
|
|
"eval_rewards/rejected": -0.21761147677898407,
|
|
"eval_runtime": 0.8825,
|
|
"eval_samples_per_second": 155.235,
|
|
"eval_steps_per_second": 5.666,
|
|
"step": 640
|
|
},
|
|
{
|
|
"epoch": 12.709219858156029,
|
|
"grad_norm": 1997.690185546875,
|
|
"learning_rate": 5.848888922025552e-08,
|
|
"log_odds_chosen": 0.3624497354030609,
|
|
"log_odds_ratio": -0.6123137474060059,
|
|
"logits/chosen": -2.179229259490967,
|
|
"logits/rejected": -2.1755523681640625,
|
|
"logps/chosen": -2.486222743988037,
|
|
"logps/rejected": -2.6808714866638184,
|
|
"loss": 2.5628,
|
|
"nll_loss": 2.6199562549591064,
|
|
"rewards/accuracies": 0.69140625,
|
|
"rewards/chosen": -0.24862225353717804,
|
|
"rewards/margins": 0.019464917480945587,
|
|
"rewards/rejected": -0.2680871784687042,
|
|
"step": 672
|
|
},
|
|
{
|
|
"epoch": 13.314420803782506,
|
|
"grad_norm": 2036.525146484375,
|
|
"learning_rate": 3.887349723342303e-08,
|
|
"log_odds_chosen": 0.3463588356971741,
|
|
"log_odds_ratio": -0.6327537298202515,
|
|
"logits/chosen": -2.181072235107422,
|
|
"logits/rejected": -2.1947262287139893,
|
|
"logps/chosen": -2.517810344696045,
|
|
"logps/rejected": -2.672647476196289,
|
|
"loss": 2.6212,
|
|
"nll_loss": 2.6852023601531982,
|
|
"rewards/accuracies": 0.68359375,
|
|
"rewards/chosen": -0.25178101658821106,
|
|
"rewards/margins": 0.01548372209072113,
|
|
"rewards/rejected": -0.2672647535800934,
|
|
"step": 704
|
|
},
|
|
{
|
|
"epoch": 13.617021276595745,
|
|
"eval_log_odds_chosen": 1.2199119329452515,
|
|
"eval_log_odds_ratio": -0.26896363496780396,
|
|
"eval_logits/chosen": -2.1166138648986816,
|
|
"eval_logits/rejected": -2.1762003898620605,
|
|
"eval_logps/chosen": -1.1962625980377197,
|
|
"eval_logps/rejected": -2.1790993213653564,
|
|
"eval_loss": 1.2387369871139526,
|
|
"eval_nll_loss": 1.3203083276748657,
|
|
"eval_rewards/accuracies": 1.0,
|
|
"eval_rewards/chosen": -0.11962626129388809,
|
|
"eval_rewards/margins": 0.09828367829322815,
|
|
"eval_rewards/rejected": -0.21790993213653564,
|
|
"eval_runtime": 0.8708,
|
|
"eval_samples_per_second": 157.334,
|
|
"eval_steps_per_second": 5.742,
|
|
"step": 720
|
|
},
|
|
{
|
|
"epoch": 13.919621749408984,
|
|
"grad_norm": 9910.3740234375,
|
|
"learning_rate": 2.298595844092377e-08,
|
|
"log_odds_chosen": 0.3617098927497864,
|
|
"log_odds_ratio": -0.6060731410980225,
|
|
"logits/chosen": -2.2685229778289795,
|
|
"logits/rejected": -2.2752606868743896,
|
|
"logps/chosen": -1.9047422409057617,
|
|
"logps/rejected": -2.0876576900482178,
|
|
"loss": 2.4957,
|
|
"nll_loss": 1.975754737854004,
|
|
"rewards/accuracies": 0.71484375,
|
|
"rewards/chosen": -0.19047421216964722,
|
|
"rewards/margins": 0.01829155907034874,
|
|
"rewards/rejected": -0.20876577496528625,
|
|
"step": 736
|
|
},
|
|
{
|
|
"epoch": 14.52482269503546,
|
|
"grad_norm": 1543.000244140625,
|
|
"learning_rate": 1.1106798553464802e-08,
|
|
"log_odds_chosen": 0.42522603273391724,
|
|
"log_odds_ratio": -0.5653746128082275,
|
|
"logits/chosen": -2.353919744491577,
|
|
"logits/rejected": -2.358372688293457,
|
|
"logps/chosen": -1.2913402318954468,
|
|
"logps/rejected": -1.542799711227417,
|
|
"loss": 1.4582,
|
|
"nll_loss": 1.3932266235351562,
|
|
"rewards/accuracies": 0.765625,
|
|
"rewards/chosen": -0.12913402915000916,
|
|
"rewards/margins": 0.025145962834358215,
|
|
"rewards/rejected": -0.15427997708320618,
|
|
"step": 768
|
|
},
|
|
{
|
|
"epoch": 15.130023640661939,
|
|
"grad_norm": 698.0999755859375,
|
|
"learning_rate": 3.4457674771554422e-09,
|
|
"log_odds_chosen": 0.4467349052429199,
|
|
"log_odds_ratio": -0.545281171798706,
|
|
"logits/chosen": -2.313391923904419,
|
|
"logits/rejected": -2.3118624687194824,
|
|
"logps/chosen": -1.2114390134811401,
|
|
"logps/rejected": -1.4863505363464355,
|
|
"loss": 1.3504,
|
|
"nll_loss": 1.3252184391021729,
|
|
"rewards/accuracies": 0.75,
|
|
"rewards/chosen": -0.1211438924074173,
|
|
"rewards/margins": 0.027491170912981033,
|
|
"rewards/rejected": -0.14863505959510803,
|
|
"step": 800
|
|
},
|
|
{
|
|
"epoch": 15.130023640661939,
|
|
"eval_log_odds_chosen": 1.211981177330017,
|
|
"eval_log_odds_ratio": -0.27068275213241577,
|
|
"eval_logits/chosen": -2.118680715560913,
|
|
"eval_logits/rejected": -2.1784884929656982,
|
|
"eval_logps/chosen": -1.1996212005615234,
|
|
"eval_logps/rejected": -2.176278829574585,
|
|
"eval_loss": 1.2384228706359863,
|
|
"eval_nll_loss": 1.3189568519592285,
|
|
"eval_rewards/accuracies": 1.0,
|
|
"eval_rewards/chosen": -0.11996213346719742,
|
|
"eval_rewards/margins": 0.09766574203968048,
|
|
"eval_rewards/rejected": -0.2176278829574585,
|
|
"eval_runtime": 0.8764,
|
|
"eval_samples_per_second": 156.329,
|
|
"eval_steps_per_second": 5.705,
|
|
"step": 800
|
|
},
|
|
{
|
|
"epoch": 15.735224586288416,
|
|
"grad_norm": 295.0424499511719,
|
|
"learning_rate": 1.3813576683111006e-10,
|
|
"log_odds_chosen": 0.44846177101135254,
|
|
"log_odds_ratio": -0.5449205636978149,
|
|
"logits/chosen": -2.3085861206054688,
|
|
"logits/rejected": -2.3130688667297363,
|
|
"logps/chosen": -1.1487438678741455,
|
|
"logps/rejected": -1.4232044219970703,
|
|
"loss": 1.3316,
|
|
"nll_loss": 1.246992588043213,
|
|
"rewards/accuracies": 0.74609375,
|
|
"rewards/chosen": -0.11487438529729843,
|
|
"rewards/margins": 0.027446046471595764,
|
|
"rewards/rejected": -0.142320454120636,
|
|
"step": 832
|
|
},
|
|
{
|
|
"epoch": 15.886524822695035,
|
|
"grad_norm": 305.3218078613281,
|
|
"learning_rate": 0.0,
|
|
"log_odds_chosen": 0.500209391117096,
|
|
"log_odds_ratio": -0.5302451848983765,
|
|
"logits/chosen": -2.2818732261657715,
|
|
"logits/rejected": -2.2850182056427,
|
|
"logps/chosen": -1.1465669870376587,
|
|
"logps/rejected": -1.4646430015563965,
|
|
"loss": 1.3265,
|
|
"nll_loss": 1.2768977880477905,
|
|
"rewards/accuracies": 0.75,
|
|
"rewards/chosen": -0.11465670168399811,
|
|
"rewards/margins": 0.03180759772658348,
|
|
"rewards/rejected": -0.1464642882347107,
|
|
"step": 840
|
|
},
|
|
{
|
|
"epoch": 15.886524822695035,
|
|
"eval_log_odds_chosen": 1.2168288230895996,
|
|
"eval_log_odds_ratio": -0.26950639486312866,
|
|
"eval_logits/chosen": -2.1189827919006348,
|
|
"eval_logits/rejected": -2.1787045001983643,
|
|
"eval_logps/chosen": -1.1971455812454224,
|
|
"eval_logps/rejected": -2.1773040294647217,
|
|
"eval_loss": 1.2378294467926025,
|
|
"eval_nll_loss": 1.3174165487289429,
|
|
"eval_rewards/accuracies": 1.0,
|
|
"eval_rewards/chosen": -0.11971455812454224,
|
|
"eval_rewards/margins": 0.09801585972309113,
|
|
"eval_rewards/rejected": -0.21773043274879456,
|
|
"eval_runtime": 0.8739,
|
|
"eval_samples_per_second": 156.768,
|
|
"eval_steps_per_second": 5.721,
|
|
"step": 840
|
|
}
|
|
],
|
|
"logging_steps": 32,
|
|
"max_steps": 840,
|
|
"num_input_tokens_seen": 0,
|
|
"num_train_epochs": 17,
|
|
"save_steps": 80,
|
|
"total_flos": 0.0,
|
|
"train_batch_size": 1,
|
|
"trial_name": null,
|
|
"trial_params": null
|
|
}
|
|
|